Overview

The purpose of this markdown is to report clinical accuracy by displaying the Error Grid analysis, computing logistic regression on “well-matched” pairs, calculating Cohen’s Kappa, and our “transfusion test” results.

Initialize

First we load the necessary packages:

suppressPackageStartupMessages({
  
  # Data frame manipulation
  require(dplyr)
  
  # Graphics and output
  require(ggplot2)

  # Tables
  require(knitr)
  require(kableExtra)
  
  # Error grid point allocation
  require(ptinpoly)

})

Ensure the environmental variables are specified:

if (Sys.getenv('PICU_LAB_DATA_PATH') == '' |
    Sys.getenv('PICU_LAB_IMG_PATH') == '' |
    Sys.getenv('PICU_LAB_IN_FILE') == '' |
    Sys.getenv('PICU_LAB_SITE_NAME') == '' |
    Sys.getenv('PICU_LAB_RUN_DATE') == '')
  stop('Missing necessary environmental variables - see README.md')

cat(sprintf('Site: %s\n', Sys.getenv('PICU_LAB_SITE_NAME')))
## Site: CHOP

Specify the run date:

run.date <- Sys.getenv('PICU_LAB_RUN_DATE')

cat(sprintf('Run Date: %s\n', run.date))
## Run Date: 2022-12-22

Data Input

Load data from the DATA_PATH with the associated IN_FILE, adding a file separator between them. This should result in loading two data frames: cohort.df and labs.df.

load(
  file = file.path(
    Sys.getenv('PICU_LAB_DATA_PATH'),
    Sys.getenv('PICU_LAB_IN_FILE')
  )
)

Set Parameters

We will utilize several sensitivity analyses in this markdown - these should be identical to the sensitivity indicators in the prior markdown 02_Analytic_Accuracy.Rmd. No changes should be made to these parameters - only additions for more sensitivity parameters.

# The primary cutoff value between collection times (in minutes) to 
# determine "simultaneous"
primary.cutoff <- 15. 

# Sensitivity analysis list
sens.cutoffs <- c(1., 30., 90.)

# Hgb cutoffs
primary.hgb.cutoff <- 7.0

sens.hgb.cutoffs <- c(5.0, 9.0)

Join

The below join function is a copy of the function used in 02_Analytic_Accuracy.Rmd. No changes should be made to this version - make changes to the prior version, re-test within that script, and then copy here. After development, this will be moved to a package.

#'
#' @title Create Paired Dataset
#' 
#' @description Creates a dataset of paired simultaneous lab values
#'
#' @param labs.df The labs data frame
#' @param cohort.df The cohort data frame, needed for PAT_KEY and DEPT
#' @param PN A two-element list of PROC_NAMEs to join
#' @param time.diff The max time difference (min) between collected times
#' @param CN The COMP_NAME to join [Default: 'Hgb']
#' @param multi.per.pt If FALSE, limit to first result per patient, otherwise 
#'     if TRUE [Default], allow all
#'     
#' @returns The resulting joined data frame
#'
createPairedDataset <- function (labs.df, cohort.df, PN, time.diff, 
                           CN = 'Hgb', multi.per.pt = T) {
  
  # First we filter to remove the non-numeric rows
  filter.df <- 
    labs.df %>%
    dplyr::filter(!is.na(NUM_VAL) & NUM_VAL != 9999999.) %>%
    dplyr::filter(COMP_NAME == CN)
  
  cat(sprintf('Number of component numeric rows in input data frame: %d\n',
              nrow(filter.df)))
  
  # Join to get PAT_KEY and DEPT, used in subsequent filtering
  keyed.df <- 
    dplyr::left_join(
      x = filter.df,
      y = cohort.df %>% 
        dplyr::select(ENC_KEY, PAT_KEY, DEPT),
      by = c('ENC_KEY')
    )
  
  # Now we filter by PN and join to create full data frame
  joined.df <-
    dplyr::inner_join(
      x = keyed.df %>%
        dplyr::filter(PROC_NAME == PN[1]) %>%
        dplyr::select(ENC_KEY, PAT_KEY, ORDER_PROC_KEY, 
                      DEPT, COLLECTED_DT, RESULT_DT, NUM_VAL, AGE_PROC),
      y = keyed.df %>%
        dplyr::filter(PROC_NAME == PN[2]) %>%
        dplyr::select(ENC_KEY, PAT_KEY, ORDER_PROC_KEY,
                      DEPT, COLLECTED_DT, RESULT_DT, NUM_VAL),
      by = c('ENC_KEY', 'PAT_KEY', 'DEPT'),
      suffix = c('.x', '.y')
    ) 
  
  # Join using base R, by column number
  #   [[5]] is PN[1] COLLECTED_DT
  #   [[10]] is PN[2] COLLECTED_DT
  joined.df$COLL_TIME_DIFF_MIN <-
    as.numeric(joined.df[[5]] - joined.df[[10]], units = 'mins')
 
  # Apply the cutoff time
  cutoff.df <- 
    joined.df %>%
    dplyr::filter(abs(COLL_TIME_DIFF_MIN) < time.diff)
  
  cat(sprintf('Number of paired, simultaneous values meeting cutoff: %d\n',
              nrow(cutoff.df)))
    
  # Ensure that each first PROC_NAME order is only used once - meaning that  
  # each ORDER_PROC_KEY.x should be unique
  unique.x.df <- 
    cutoff.df%>%
    dplyr::arrange(ORDER_PROC_KEY.x, COLL_TIME_DIFF_MIN) %>%
    dplyr::group_by(ORDER_PROC_KEY.x) %>%
    dplyr::summarize(
      ORDER_PROC_KEY.y   = first( ORDER_PROC_KEY.y   ),
      DEPT               = first( DEPT               ),
      COLLECTED_DT.x     = first( COLLECTED_DT.x     ),
      RESULT_DT.x        = first( RESULT_DT.x        ),
      NUM_VAL.x          = first( NUM_VAL.x          ),
      COLLECTED_DT.y     = first( COLLECTED_DT.y     ),
      RESULT_DT.y        = first( RESULT_DT.y        ),
      NUM_VAL.y          = first( NUM_VAL.y          ),
      COLL_TIME_DIFF_MIN = first( COLL_TIME_DIFF_MIN ),
      AGE_PROC           = first( AGE_PROC           ),
      ENC_KEY            = first( ENC_KEY            ),
      PAT_KEY            = first( PAT_KEY            )
    ) %>%
    dplyr::ungroup()

  cat(sprintf('Number of non-duplicated first PROC_NAME rows: %d\n',
              nrow(unique.x.df)))
    
  # Similarly, ensure that each second PROC_NAME order is being used just once
  # (i.e., that ORDER_PROC_KEY.y is not duplicated)
  non.dup.df <-
    unique.x.df %>%
    dplyr::arrange(ORDER_PROC_KEY.y, COLL_TIME_DIFF_MIN) %>%
    dplyr::group_by(ORDER_PROC_KEY.y) %>%
    dplyr::summarize(
      ORDER_PROC_KEY.x   = first( ORDER_PROC_KEY.x   ),
      DEPT               = first( DEPT               ),
      COLLECTED_DT.x     = first( COLLECTED_DT.x     ),
      RESULT_DT.x        = first( RESULT_DT.x        ),
      NUM_VAL.x          = first( NUM_VAL.x          ),
      COLLECTED_DT.y     = first( COLLECTED_DT.y     ),
      RESULT_DT.y        = first( RESULT_DT.y        ),
      NUM_VAL.y          = first( NUM_VAL.y          ),
      COLL_TIME_DIFF_MIN = first( COLL_TIME_DIFF_MIN ),
      AGE_PROC           = first( AGE_PROC           ),      
      ENC_KEY            = first( ENC_KEY            ),
      PAT_KEY            = first( PAT_KEY            )
    ) %>%
    dplyr::ungroup()
  
  cat(sprintf('Number of non-duplicated second PROC_NAME rows: %d\n',
              nrow(non.dup.df)))
    
  # Do we limit by one per patient?
  if (!multi.per.pt) {
    per.pt.df <-
      non.dup.df %>%
      # Sort by PAT_KEY and the first COLLECTED DT
      dplyr::arrange(PAT_KEY, COLLECTED_DT.x) %>%
      # Group by PAT_KEY and add a "LINE" number 
      dplyr::group_by(PAT_KEY) %>%
      dplyr::mutate(
        PAT_LINE = row_number()
      ) %>%
      # Ungroup
      dplyr::ungroup() %>%
      # Filter for lines == 1 only
      dplyr::filter(PAT_LINE == 1) %>%
      dplyr::select(-PAT_LINE)
  } else {
    per.pt.df <- non.dup.df
  }
  
  cat(sprintf('Number of paired, simultaneous values: %d\n',
              nrow(per.pt.df)))
  
  cat(sprintf('Number of duplicated ORDER_PROC_KEY.x values: %d\n',
              sum(duplicated(per.pt.df$ORDER_PROC_KEY.x))))
  
  return(per.pt.df)
}

First we create the CBC - BG dataset using the primary cutoff value, and include all pairs per patient.

cbc.bg <- createPairedDataset(
  labs.df = labs.df, 
  cohort.df = cohort.df,
  PN = c('CBC', 'BG'), 
  CN = 'Hgb',
  time.diff = primary.cutoff,
  multi.per.pt = T
)
## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 72997
## Number of non-duplicated first PROC_NAME rows: 67141
## Number of non-duplicated second PROC_NAME rows: 67077
## Number of paired, simultaneous values: 67077
## Number of duplicated ORDER_PROC_KEY.x values: 0

Analyze

In this section we complete the clinical accuracy assessments.

Error Grid

We begin the assessment of clinical accuracy by creating the Error Grid.

#'
#' @title Calculate Error Grid
#' 
#' @description Calculates points within each area of Error Grid and plots
#' 
#' @param df The paired samples data frame, calculated above
#' @param to.plot If TRUE [Default], displays the Error Grid plot
#' @param to.return If TRUE [Default], returns the plot
#'
calculateErrorGrid <- function (df, to.plot = T, to.return = T) {
  
  #'
  #' Sub-function to define the underlying grid pts
  #' 
  makeBaseGrid <- function () {
    
    # Define the points which comprise the Error Grid
    A <- data.frame(
      X = c(0, 6, 6, 10, 25, 25, 9, 9, 5.4, 0),
      Y = c(0, 0, 5.4, 9, 9, 25, 25, 10, 6, 6))
    
    B <- data.frame(
      X = c(0, 25, 25, 0),
      Y = c(0, 0, 25, 25))
    
    C <- data.frame(
      X = c(0, 6, 6, 0),
      Y = c(10, 10, 25, 25))
    
    D <- data.frame(
      X = c(10, 10, 25, 25),
      Y = c(0, 6, 6, 0))
    
    # Generate grid
    p <- 
      ggplot() + 
      geom_abline(mapping = NULL, data = NULL,
                  slope = 1, intercept = 0, na.rm = FALSE, 
                  show.legend = NA, size = 1) + 
      geom_polygon(aes(x = X, y = Y), size = 1.5, color = 'green', 
                   fill = 'green', alpha = 0.2, data = A) + 
      geom_polygon(aes(x = X, y = Y), size = 1.5, color = 'yellow', 
                   fill = 'yellow', alpha = 0.1, data = B) +
      geom_polygon(aes(x = X, y = Y), size = 1.5, color = 'red', 
                   fill = 'red', alpha = 0.2, data = C) +
      geom_polygon(aes(x = X, y = Y), size = 1.5, color = 'red', 
                   fill = 'red', alpha = 0.2, data = D) + 
      coord_cartesian(ylim = c(4, 20), xlim = c(4, 20)) + 
      scale_fill_distiller(palette = 4, direction = 1) 

    return(list(
      A = A, B = B, C = C, D = D, p = p
    ))
  } # End of sub-function
  
  g <- makeBaseGrid()
  
  Queries <- as.matrix(df %>% dplyr::select(NUM_VAL.x, NUM_VAL.y))

  A.res <- ptinpoly::pip2d(Vertices = as.matrix(g$A), Queries = Queries)
  B.res <- ptinpoly::pip2d(Vertices = as.matrix(g$B), Queries = Queries)
  C.res <- ptinpoly::pip2d(Vertices = as.matrix(g$C), Queries = Queries)
  D.res <- ptinpoly::pip2d(Vertices = as.matrix(g$D), Queries = Queries)
  
  # First display raw "Box" output
  cat(sprintf(paste0(
    'Counts by Box:\n',
    '\tBox A: %d (%0.1f %%)\n',
    '\tBox B: %d (%0.1f %%)\n',
    '\tBox C: %d (%0.1f %%)\n',
    '\tBox D: %d (%0.1f %%)\n'),
    sum(A.res >= 0), sum(A.res >= 0) / length(A.res) * 100.,
    sum(B.res >= 0), sum(B.res >= 0) / length(A.res) * 100.,
    sum(C.res >= 0), sum(C.res >= 0) / length(A.res) * 100.,
    sum(D.res >= 0), sum(D.res >= 0) / length(A.res) * 100.))
  
  # Now display by Green, Yellow, Red
  #   Note that Green = A, Yellow = B - A - C - D, Red = C + D
  cat(sprintf(paste0(
    'Counts by Area:\n',
    '\tGreen Area: %d (%0.2f %%)\n',
    '\tYellow Area: %d (%0.2f %%)\n',
    '\tRed Area: %d (%0.2f %%)\n'),
    sum(A.res >= 0), sum(A.res >= 0) / length(A.res) * 100.,
    sum(B.res >= 0) - sum(A.res >= 0) - sum(C.res >= 0) - sum(D.res >= 0),
    ( sum(B.res >= 0) - sum(A.res >= 0) - sum(C.res >= 0) - sum(D.res >= 0) ) /
      length(A.res) * 100.,
    sum(C.res >= 0) + sum(D.res >= 0),
    (sum(C.res >= 0) + sum(D.res >= 0) ) / length(A.res) * 100.
  ))
  
  # Plot and return (pending parameters)
  if (to.plot & to.return) {
    Error_Grid <- 
      g$p +
      geom_jitter(aes(x = NUM_VAL.x, y = NUM_VAL.y), data = df, 
                  width = 0.3, height = 0.3, size = 0.3) + 
      xlab('Reference Lab Value') + 
      ylab('Measured Lab Value') + 
      theme_bw() + 
      theme(panel.grid.minor = element_blank()) + 
      theme(panel.background = element_rect(fill = "transparent", colour = NA),
            plot.background = element_rect(fill = "transparent", colour = NA))
  
  } 
  
  if (to.plot) {
    print(Error_Grid)
  } 
  
  if (to.return) {
    return(Error_Grid)
    
  } else {
    return()
    
  }
}

Now we use this function to calculate Error Grid counts and display plot:

error.grid.cbc.bg <- calculateErrorGrid(
  df = cbc.bg,
  to.plot = T,
  to.return = T
)
## Counts by Box:
##  Box A: 63567 (94.8 %)
##  Box B: 67077 (100.0 %)
##  Box C: 42 (0.1 %)
##  Box D: 29 (0.0 %)
## Counts by Area:
##  Green Area: 63567 (94.77 %)
##  Yellow Area: 3439 (5.13 %)
##  Red Area: 71 (0.11 %)

Regression Analysis

To complete regression analysis, we must first gather the other variables from the original labs.df data frame. Then we set a “well-matched” threshold and run the regression model.

To do this properly on the UR system, which utilizes both “blood gas” and “blood gas panel” orders for different components of the blood gas, we need to join on ENC_KEY and by COLLECTED_DT because ORDER_PROC_KEY does NOT generate the full match.

#'
#' @title Gather Covariates
#' 
#' @description Creates pivoted data frame of covariate labs for each pair
#' 
#' @details Requires that the column `ORDER_PROC_KEY.x` is unique in the 
#'     paired dataframe. This will be true when the data frame is created
#'     using the above function `createPairedDataset()` which catches duplicates.
#'     
#'     To find covariates, instead of matching on the `ORDER_PROC_KEY.x` which
#'     is correct for some of the components, matches on the PN[2] collected time
#'     (`COLLECTED_DT.y`) and the encounter key. These are checked to be unique
#'     in the input joined data frame as well.
#' 
#' @param paired.df The paired dataframe containing unique values at the 
#'     `ORDER_PROC_KEY.x` column, which is the column for the order key for
#'     PN[1] (typically the CBC). Also must contain (at least) the columns
#'     `COLLECTED_DT.y` and `ENC_KEY`
#' @param labs.df The full labs data frame
#' @param covars A list of covariate names into the `COMP_NAME` column
#' 
#' @returns A pivoted data frame of covariates
#'
gatherCovariates <- function (paired.df, labs.df, 
                              covars = c('pH', 'Bicarb', 'iCal', 'Gluc', 'Lactate')) {
  
  # Ensure that these are all unique
  if (length(unique(paired.df$ORDER_PROC_KEY.x)) != 
      length(paired.df$ORDER_PROC_KEY.x))
    stop('PN[1] Order Proc Keys should be unique')
  
  # Ensure that collected time of PN[2] and ENC_KEY are distinct
  if (nrow(paired.df) != 
      nrow(paired.df %>% 
           dplyr::select(COLLECTED_DT.y, ENC_KEY) %>% 
           dplyr::distinct()))
    stop('COLLECTED_DT.y and ENC_KEY tuple are not distinct in paired data frame')
  
  # Filter to remove any cancelled labs or NaNs
  filtered.df <-
    labs.df %>%
    dplyr::filter(!is.na(NUM_VAL) & NUM_VAL != 9999999.)
  
  # Initialize the result data frame using the unique `ORDER_PROC_KEY.x` values
  result.df <- data.frame(
    ORDER_PROC_KEY.x = paired.df$ORDER_PROC_KEY.x
  )
  
  cat(sprintf('NUmber of unique PN[1] order procedure keys: %d\n',
              nrow(result.df)))

  # Join the filtered data frame to full lab results and find matches
  # on the ENC_KEY and COLLECTED_DT.y
  joined.df <-
    dplyr::inner_join(
      x = paired.df %>%
        dplyr::select(ORDER_PROC_KEY.x, ENC_KEY, COLLECTED_DT.y),
      y = filtered.df,
      by = c('ENC_KEY', 'COLLECTED_DT.y' = 'COLLECTED_DT')
    )
  
  # Join each component to the results data frame
  for (CN in covars) {
    
    result.df <-
      joined.df %>%
      dplyr::filter(COMP_NAME == CN) %>%
      dplyr::select(ORDER_PROC_KEY.x, NUM_VAL, RESULT_DT) %>%
      dplyr::arrange(ORDER_PROC_KEY.x, RESULT_DT) %>%
      dplyr::group_by(ORDER_PROC_KEY.x) %>%
      dplyr::summarize(
        LAST_ADD = first(NUM_VAL)
      ) %>%
      dplyr::ungroup() %>%
      dplyr::right_join(
        y = result.df,
        by = c('ORDER_PROC_KEY.x')
      )
    
    names(result.df)[which(names(result.df) == 'LAST_ADD')] <- CN
  }
  
  return(result.df)
}

Here we utilize the gatherCovariates() function to extract covariates for our paired dataset:

# Gather covariates for this paired set
covars.df <- gatherCovariates(cbc.bg, labs.df)
## NUmber of unique PN[1] order procedure keys: 67077

Now we display stats on the covariates, to ensure we have appropriately filled the table and show the distributions. TO do this, we write a function which iterates across the columns:

#'
#' @title Display Covariate Stats
#' 
#' @description Displays statistics on covariates in the data frame
#' 
#' @param covars.df The Covariates data frame from `gatherCovariates()` function
#' 
displayCovariateStats <- function (covars.df) {

  # Display stats on the covariates, including checking for NULL values and
  # displaying distributions
  for (index in 2 : ncol(covars.df)) {
    
    this.vec <- covars.df[,index]
    
    print(summary(this.vec))
    
    cat(sprintf('Count (and %%) of NAs in %s column: %d (%0.2f %%)\n',
                names(covars.df)[index],
                sum(is.na(this.vec)),
                sum(is.na(this.vec)) / nrow(covars.df) * 100.))
    
    bounds <- quantile(this.vec, probs = c(0.01, 0.99), na.rm = T)
    
    this.df <-
      covars.df %>%
      dplyr::select(all_of(index))
    
    names(this.df) <- c('val')
    
    this.filt.df <-
      this.df %>%
      dplyr::filter(val >= bounds[1] & val <= bounds[2])
    
    hist.bins <- min(
      length(unique(this.filt.df$val)),
      40)
    
    p <-
      this.filt.df %>%
      ggplot() +
      geom_histogram(aes(x = val), bins = hist.bins) +
      xlab(paste0(names(covars.df)[index], ' values (1st - 99th percentile)')) +
      ylab('Count') +
      theme_bw()
    
    print(p)
  }
  
}
displayCovariateStats(covars.df)
##     Lactate      
##  Min.   : 0.500  
##  1st Qu.: 1.100  
##  Median : 1.500  
##  Mean   : 2.071  
##  3rd Qu.: 2.200  
##  Max.   :27.200  
##  NA's   :17366   
## Count (and %) of NAs in Lactate column: 17366 (25.89 %)

##       Gluc       
##  Min.   :  13.0  
##  1st Qu.:  97.0  
##  Median : 115.0  
##  Mean   : 127.7  
##  3rd Qu.: 141.0  
##  Max.   :3008.0  
##  NA's   :5903    
## Count (and %) of NAs in Gluc column: 5903 (8.80 %)

##       iCal      
##  Min.   :0.270  
##  1st Qu.:1.110  
##  Median :1.170  
##  Mean   :1.177  
##  3rd Qu.:1.230  
##  Max.   :2.740  
##  NA's   :5841   
## Count (and %) of NAs in iCal column: 5841 (8.71 %)

##      Bicarb     
##  Min.   : 1.80  
##  1st Qu.:23.90  
##  Median :27.30  
##  Mean   :28.08  
##  3rd Qu.:31.60  
##  Max.   :82.60  
##  NA's   :5      
## Count (and %) of NAs in Bicarb column: 5 (0.01 %)

##        pH        
##  Min.   : 6.605  
##  1st Qu.: 7.353  
##  Median : 7.398  
##  Mean   : 7.393  
##  3rd Qu.: 7.438  
##  Max.   :37.600  
##  NA's   :4       
## Count (and %) of NAs in pH column: 4 (0.01 %)

Now we join the covariates to the paired dataframe and add the age, filter and impute. Then we run the logistic regression and display the results:

#'
#' @title Join Impute Regress
#' 
#' @description Join covars and pairs, impute NA values, regress, and report results
#' 
#' @param paired.df A dataframe of paired PN[1] and PN[2] values, created from 
#'     the function `createPairedDataset()`
#' @param covars.df A dataframe of covariates, created from the 
#'     function `gatherCovariates()`
#' @param thresh.list A list of three-element vectors, where each three-element
#'     vector is of the format c(min, max, thresh). Such that for the list of 
#'     pairs, if the mean Hgb (between PN[1] and PN[2]) falls between `min` and 
#'     `max` and the difference is less than the threshold `thresh`, then the 
#'     pair is considered `WELL_MATCHED` for the purposes of our logistic 
#'     regression.
#' @param impute.fx The function for imputing NA values [Default: `median`]
#' @param ci If TRUE, compute the confidence intervals on the regression results 
#'     [Default: TRUE]
#'     
#' @returns A list of the regression results as well as CIs (if computed)
#'
joinImputeRegress <- function (paired.df, covars.df, thresh.list, 
                                 impute.fx = median, ci = T) {
  
  # First we join the paired data frame (as the basis) with the covariates,
  # by the unique ORDER PROC key of PN[1] (should be CBC)
  joined.df <-
    dplyr::left_join(
      x = paired.df, 
      y = covars.df,
      by = c('ORDER_PROC_KEY.x')
    ) %>%
    dplyr:: select(NUM_VAL.x, NUM_VAL.y, AGE_PROC, pH, Bicarb, iCal, Gluc, Lactate, DEPT)
  
  # Impute NA values based on the impute function
  impute.df <-
    joined.df %>%
    dplyr::mutate(
      pH =      ifelse( is.na(pH),      impute.fx(joined.df$pH, na.rm = T),      pH),
      Gluc =    ifelse( is.na(Gluc),    impute.fx(joined.df$Gluc, na.rm = T),    Gluc),
      iCal =    ifelse( is.na(iCal),    impute.fx(joined.df$iCal, na.rm = T),    iCal),
      Lactate = ifelse( is.na(Lactate), impute.fx(joined.df$Lactate, na.rm = T), Lactate),
      Bicarb =  ifelse( is.na(Bicarb),  impute.fx(joined.df$Bicarb, na.rm = T),  Bicarb)
    )
  
  # Threshold to create "WELL_MATCHED" column
  thresh.df <-
    impute.df %>%
    dplyr::mutate(
      MEAN_HGB = (NUM_VAL.x + NUM_VAL.y) / 2.,
      DIFF_HGB = abs(NUM_VAL.x - NUM_VAL.y),
      WELL_MATCHED = F # Default to FALSE
    )
  
  # Loop through the threshold list and OR together the WELL_MATCHED values
  for (t in thresh.list) {
    if (length(t) != 3)
      stop('Each vector within the threshold list must be three elements')
    
    thresh.df$WELL_MATCHED <-
      thresh.df$WELL_MATCHED | 
      (thresh.df$MEAN_HGB > t[1] & thresh.df$MEAN_HGB < t[2] & thresh.df$DIFF_HGB < t[3])
  }
  
  # Remove the MEAN and DIFF variables, as well as PN[1] value
  thresh.df <-
    thresh.df %>%
    dplyr::select(-MEAN_HGB, -DIFF_HGB, -NUM_VAL.x)
  

  cat(sprintf('Number (%%) of `WELL MATCHED`: %d (%0.2f %%)\n',
              sum(thresh.df$WELL_MATCHED),
              sum(thresh.df$WELL_MATCHED) / nrow(thresh.df) * 100.))
  
  # Run the logistic regression 
  reg.model <- glm(
    WELL_MATCHED ~ NUM_VAL.y +
      pH + 
      Gluc +
      Bicarb +
      iCal +
      Lactate +
      AGE_PROC + 
      DEPT,
    family = 'binomial',
    data = thresh.df %>%
      dplyr::mutate(pH = pH * 10.)
  )
  
  print(summary(reg.model))
  
  print(exp(reg.model$coefficients))
  
  if (ci) {
    
    ci.reg <- confint(reg.model)
    
    print(exp(ci.reg))
    
    return(list(
      reg.model,
      ci.reg
    ))
    
  } else {
    return(list(
      reg.model
    ))
  }
}

Now run the joining, imputing, and regression:

# Threshold lists are a list of three-element vectors, which the three elements
# corresponding to: min, max, threshold
# This can be read as, between the min and max, the mean diff must be less than 
# the threshold, otherwise it is not `WELL_MATCHED`
thresh.list <-
  list(
    c(-100, 6, 1.5),
    c(6, 9, 1.0),
    c(9, 100, 1.5)
  )

regress.res <-
  joinImputeRegress(
    paired.df = cbc.bg,
    covars.df = covars.df,
    thresh.list = thresh.list,
    impute.fx = median,
    ci = T
  )
## Number (%) of `WELL MATCHED`: 62131 (92.63 %)
## 
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb + 
##     iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>% 
##     dplyr::mutate(pH = pH * 10))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.3293   0.2559   0.3995   0.4421   1.1141  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.526e+00  5.240e-01   6.729 1.71e-11 ***
## NUM_VAL.y    3.478e-02  7.502e-03   4.637 3.54e-06 ***
## pH          -7.233e-03  6.342e-03  -1.140    0.254    
## Gluc        -5.059e-04  2.438e-04  -2.075    0.038 *  
## Bicarb       2.274e-03  2.332e-03   0.975    0.329    
## iCal        -1.141e-01  1.494e-01  -0.764    0.445    
## Lactate      7.891e-02  1.095e-02   7.208 5.67e-13 ***
## AGE_PROC    -1.003e-04  7.173e-06 -13.988  < 2e-16 ***
## DEPTPICU    -8.797e-01  4.335e-02 -20.295  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 35309  on 67076  degrees of freedom
## Residual deviance: 34047  on 67068  degrees of freedom
## AIC: 34065
## 
## Number of Fisher Scoring iterations: 6
## 
## (Intercept)   NUM_VAL.y          pH        Gluc      Bicarb        iCal 
##  33.9810602   1.0353945   0.9927930   0.9994943   1.0022771   0.8921726 
##     Lactate    AGE_PROC    DEPTPICU 
##   1.0821103   0.9998997   0.4149064
## Waiting for profiling to be done...
##                 2.5 %      97.5 %
## (Intercept) 9.5488867 111.6783127
## NUM_VAL.y   1.0203131   1.0507622
## pH          0.9780113   1.0089308
## Gluc        0.9990279   0.9999864
## Bicarb      0.9977260   1.0068891
## iCal        0.6666270   1.1972400
## Lactate     1.0597008   1.1061743
## AGE_PROC    0.9998856   0.9999137
## DEPTPICU    0.3809174   0.4514699

Cohen’s Kappa

Cohen’s kappa coefficient provides an assessment of agreement between two “raters”, or as an assessment of classification matching. In our case, we would like to understand the agreement between two PROC_NAME (e.g. CBC and BG) hemoglobins, when a simple threshold is applied.

We hypothesize that there will be agreement at the tails but that there will be some disagreement in the local vicinity of the threshold, which will likely drive down the Kappa coefficient.

First, here is the function to compute the Cohen’s Kappa:

#'
#' @title Calculate Cohen Kappa
#'
#' @description Calculates the Cohen Kappa statistic for two vectors of CN values
#'
#' @details
#'     Recall that Cohen's Kappa is defined as:
#'
#'       K = (P_observed - P_expected) / (1 - P_expected)
#'
#'       Where:
#'         P_expected = sum(P_pos + P_neg) with
#'           P_pos = P_raterA+ x P_raterB+ and P_neg = P_raterA- x P_raterB-
#'
#'     In our case, Rater A will be positive when value.x (from PN[1]) are less
#'     than the cutoff, suggesting the need for a transfusion. Similarly, 
#'     Rater B will be positive when value.y (from PN[2]) are less than the
#'     cutoff.  
#'
#' A "Positive" response (meaning we have to transfuse) is when Hgb < cutoff,
#' and a "Negative" response (meaning we do not transfuse) is when Hgb >= cutoff.
#'
#' @param values.x A column vector of Hgb values from PN[1]
#' @param values.y A column vector of Hgb values from PN[2] (with length 
#'     same as rater.A.bg)
#' @param cutoff A scalar representing the Hgb cutoff value
#' @param to.print If TRUE, prints results in addition to returning [Default]
#'
#' @return The Cohen Kappa for these two vectors at the cutoff given
#'
calculateCohenKappa <- function (values.x, values.y, 
                                 cutoff = 7.0, to.print = T) {

  # Verify that the lengths of the two vectors of PN values are identical
  if (length(values.x) != length(values.y))
    stop('Error: vectors for X and Y must be of equal lengths')

  if (to.print)
    cat(sprintf('Pre-Range Check Length: %d\n', length(values.x)))
  
  if (to.print)
    cat(sprintf('Cutoff value used: %d\n', cutoff))

  # Consider a 2x2 matrix with two "Raters" (or two vectors):
  # 
  #                      values.x 
  #                   Yes   |     No
  #                -------------------
  #            Yes |   A    |    B   |
  #  values.y  ----|--------|--------|
  #            No  |   C    |    D   |
  #                -------------------
  #
  #  where: len = A + B + C + D
  #
  P.x.pos <- sum(values.x < cutoff) / length(values.x) # Equiv to 'A + C'  / len
  P.x.neg <- sum(values.x >= cutoff) / length(values.x) # Equiv to 'B + D' / len

  P.y.pos <- sum(values.y < cutoff) / length(values.y) # Equiv to 'A + B' / len
  P.y.neg <- sum(values.y >= cutoff) / length(values.y) # Equiv to ''C + D' / len

  if (to.print) {
    cat(sprintf('X :: Pos: %0.2f\tNeg: %0.2f\n', P.x.pos, P.x.neg))
    cat(sprintf('Y :: Pos: %0.2f\tNeg: %0.2f\n', P.y.pos, P.y.neg))
  }

  P.pos <- P.x.pos * P.y.pos
  P.neg <- P.x.neg * P.y.neg

  if (to.print)
    cat(sprintf('\tP.pos: %0.4f\n\tP.neg: %0.4f\n', P.pos, P.neg))

  P.exp <- P.pos + P.neg

  # Observed are the sum of counts of when both are either > or <=, divided by total
  # Note that we can divide by either length(rater.A.bg) or length(rater.B.cbc) 
  # since they are equal
  P.obs <- (
    sum((values.x < cutoff) & (values.y < cutoff)) +
      sum((values.x >= cutoff) & (values.y >= cutoff)) ) / length(values.x)

  if (to.print)
    cat(sprintf('\tP.obs: %0.2f\n\tP.exp: %0.2f\n', P.obs, P.exp))

  kappa <- (P.obs - P.exp) / (1. - P.exp)

  if (to.print)
    cat(sprintf('Kappa: %0.2f\n', kappa))

  return(kappa)
}

Now we use this function to calculate the Cohen’s kappa at a given primary threshold:

calculateCohenKappa(
  values.x = cbc.bg$NUM_VAL.x,
  values.y = cbc.bg$NUM_VAL.y,
  cutoff = primary.hgb.cutoff,
  to.print = T
)
## Pre-Range Check Length: 67077
## Cutoff value used: 7
## X :: Pos: 0.02   Neg: 0.98
## Y :: Pos: 0.01   Neg: 0.99
##  P.pos: 0.0001
##  P.neg: 0.9747
##  P.obs: 0.99
##  P.exp: 0.97
## Kappa: 0.51
## [1] 0.511274

And run again across the secondary thresholds:

for (thresh in sens.hgb.cutoffs) {
  
  calculateCohenKappa(
    values.x = cbc.bg$NUM_VAL.x,
    values.y = cbc.bg$NUM_VAL.y,
    cutoff = thresh,
    to.print = T
  )
  
}
## Pre-Range Check Length: 67077
## Cutoff value used: 5
## X :: Pos: 0.00   Neg: 1.00
## Y :: Pos: 0.00   Neg: 1.00
##  P.pos: 0.0000
##  P.neg: 0.9983
##  P.obs: 1.00
##  P.exp: 1.00
## Kappa: 0.39
## Pre-Range Check Length: 67077
## Cutoff value used: 9
## X :: Pos: 0.20   Neg: 0.80
## Y :: Pos: 0.12   Neg: 0.88
##  P.pos: 0.0250
##  P.neg: 0.7000
##  P.obs: 0.91
##  P.exp: 0.72
## Kappa: 0.67
rm(thresh)

Transfusion Tests

In this section, we ask the question, “If the BG Hgb value is greater than X, what is the likelihood that the CBC Hgb is less than Y?” for an appropriate transfusion threshold Y.

Similarly, we can ask the question, “If the BG Hgb value is less than X, what is the likelihood that the CBC Hgb is greater than Y?” for the same set of transfusion thresholds.

First we create the function to calculate the 2x2 matrix for this test. The “gold standard” is the PN[1] value, most often CBC. The “test” is the PN[2] value, either BG or iSTAT. In words:

  • The TP cell includes those where both the CBC and BG are below their cutoff values
  • The FP cell includes those where the BG is below the cutoff, but the CBC is not (not actually anemic)
  • The FN cell includes those where the BG is above the cutoff, but the CBC is below (actually anemic)
  • The TN cell includes those where both BG and CBC are above their cutoff

In this way, the “sensitivity” is the percent of actually anemic patients identified correctly by the test. The PPV is the percent of patients identified by the test as anemic who are actually anemic (and is dependent on the incidence of anemia in our population).

#'
#' @title Transfusion Confusion Matrix
#' 
#' @description Creates a 2x2 confusion matrix for a pair of cutoffs and direction
#' 
#' @param value.x The value of PN[1] elements (typically CBC)
#' @param value.y The value of PN[2] elements (typically BG or iSTAT)
#' @param cutoffs A two-element vector which specifies the cutoffs for PN[1] 
#'     (which is typically the gold standard CBC) and for PN[2] (typically the
#'     "test" BG or iStat)
#' @param to.print If TRUE, prints results [Default]
#' @param to.return If TRUE, returns results [Default]
#'
transfusionConfusionMatrix <- function (value.x, value.y,
                                  cutoffs = c(7., 7.),
                                  to.print = T,
                                  to.return = T) {

  # Are the lengths equal
  stopifnot(length(value.x) == length(value.y))

  # Is `cutoffs` a two-element vector?
  stopifnot(length(cutoffs) == 2)

  if (to.print)
    cat(sprintf('Total number of input rows: %d\n',length(value.x)))

  # 2x2 Standard Table:
  #
  #               Gold Standard (PN[1])
  #                      value.x
  #                 Pos     |   Neg
  #               ----------------------
  #           Pos |   TP    |     FP   |
  # Test (PN[2])  |---------|----------|
  #  value.y  Neg |   FN    |     TN   |
  #               ----------------------
  #
  # In the default case, we consider "POS" to reflect the true need for a 
  # transfusion, meaning the value was < the cutoff
  
  if (to.print)
    cat(sprintf('Gold Standard:\n\tPositive: %d (%0.2f %%)\n\tNegative: %d (%0.2f %%)\n',
                sum( value.x < cutoffs[1] ),
                sum( value.x < cutoffs[1] ) / length(value.x) * 100.,
                sum( value.x >= cutoffs[1] ),
                sum( value.x >= cutoffs[1] ) / length(value.x) * 100.))
  
  # Rater A - yes, Rater B - yes
  TP <- sum( (value.x < cutoffs[1]) & (value.y < cutoffs[2]) )

  # Rater A - yes, Rater B - no
  FP <- sum( (value.x >= cutoffs[1]) & (value.y < cutoffs[2]) )

  # Rater A - no, Rater B - yes
  FN <- sum( (value.x < cutoffs[1]) & (value.y >= cutoffs[2]) )

  # Rater A - no, Rater B - no
  TN <- sum( (value.x >= cutoffs[1]) & (value.y >= cutoffs[2]) )

  # Dummy check - do these all add up to total length
  stopifnot(TP+FP+TN+FN == length(value.x))
  
  sens <- TP / (TP + FN)
  spec <- TN / (TN + FP)
  ppv <- TP / (TP + FP)
  npv <- TN / (FN + TN)

  if (to.print)
    cat(sprintf(paste0(
      'Cutoffs: PN[1]: %0.1f\tPN[2]: %0.1f\n',
      'TP: %d (%0.4f %%)\t',
      'FP: %d (%0.4f %%)\n',
      'FN: %d (%0.4f %%)\t',
      'TN: %d (%0.4f %%)\n',
      'Sens: %0.4f\n',
      'Spec: %0.4f\n',
      'PPV: %0.4f\n',
      'NPV: %0.4f\n',
      'FOR (1-NPV): %0.4f\n',
      'NNM (1/FOR): %0.4f\n\n'),
      cutoffs[1], cutoffs[2],
      TP, TP / length(value.x) * 100.,
      FP, FP / length(value.x) * 100.,
      FN, FN / length(value.x) * 100.,
      TN, TN / length(value.x) * 100.,
      sens, spec, ppv, npv, (1. - npv), 1. / (1. - npv)
    ))
  
  if (to.return) {
    return(list(
      cutoffs = cutoffs,
      TP = TP, FP = FP, TN = TN, FN = FN,
      sens = sens, spec = spec, ppv = ppv, npv = npv,
      falseOR = 1. - npv, 
      nnm = 1. / (1. - npv)
    ))
  }
}

We can run this across a few standard values:

mat.across.cutoffs <- list()

for (pn2.cutoff in c(7.0, 7.5, 8.0, 8.5, 9.0)) {
  
  res <- transfusionConfusionMatrix(
    value.x = cbc.bg$NUM_VAL.x,
    value.y = cbc.bg$NUM_VAL.y,
    cutoffs = c(primary.hgb.cutoff, pn2.cutoff),
    to.print = T,
    to.return = T
  )
  
  mat.across.cutoffs <-
    append(
      mat.across.cutoffs,
      list(list( # Use a double list here so that we enclose each cutoff results in a list
        res = res, cutoff = pn2.cutoff
      ))
    )
}
## Total number of input rows: 67077
## Gold Standard:
##  Positive: 1159 (1.73 %)
##  Negative: 65918 (98.27 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.0
## TP: 441 (0.6575 %)   FP: 107 (0.1595 %)
## FN: 718 (1.0704 %)   TN: 65811 (98.1126 %)
## Sens: 0.3805
## Spec: 0.9984
## PPV: 0.8047
## NPV: 0.9892
## FOR (1-NPV): 0.0108
## NNM (1/FOR): 92.6588
## 
## Total number of input rows: 67077
## Gold Standard:
##  Positive: 1159 (1.73 %)
##  Negative: 65918 (98.27 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.5
## TP: 783 (1.1673 %)   FP: 519 (0.7737 %)
## FN: 376 (0.5605 %)   TN: 65399 (97.4984 %)
## Sens: 0.6756
## Spec: 0.9921
## PPV: 0.6014
## NPV: 0.9943
## FOR (1-NPV): 0.0057
## NNM (1/FOR): 174.9335
## 
## Total number of input rows: 67077
## Gold Standard:
##  Positive: 1159 (1.73 %)
##  Negative: 65918 (98.27 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.0
## TP: 925 (1.3790 %)   FP: 1769 (2.6373 %)
## FN: 234 (0.3489 %)   TN: 64149 (95.6349 %)
## Sens: 0.7981
## Spec: 0.9732
## PPV: 0.3434
## NPV: 0.9964
## FOR (1-NPV): 0.0036
## NNM (1/FOR): 275.1410
## 
## Total number of input rows: 67077
## Gold Standard:
##  Positive: 1159 (1.73 %)
##  Negative: 65918 (98.27 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.5
## TP: 993 (1.4804 %)   FP: 4032 (6.0110 %)
## FN: 166 (0.2475 %)   TN: 61886 (92.2611 %)
## Sens: 0.8568
## Spec: 0.9388
## PPV: 0.1976
## NPV: 0.9973
## FOR (1-NPV): 0.0027
## NNM (1/FOR): 373.8072
## 
## Total number of input rows: 67077
## Gold Standard:
##  Positive: 1159 (1.73 %)
##  Negative: 65918 (98.27 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 9.0
## TP: 1031 (1.5370 %)  FP: 7332 (10.9307 %)
## FN: 128 (0.1908 %)   TN: 58586 (87.3414 %)
## Sens: 0.8896
## Spec: 0.8888
## PPV: 0.1233
## NPV: 0.9978
## FOR (1-NPV): 0.0022
## NNM (1/FOR): 458.7031
rm(pn2.cutoff)

The above function can be run across a range of cutoff values to generate both an ROC and a P-R curve, which give information about the “performance” of the test - either BG or iStat. We can calculate the AUROC and the optimal test thresholds (as the points closest to 0,1).

#'
#' @title Calculate Threshold ROC
#' 
#' @description Calculates an ROC and P-R based on Transfusion "Test"  
#' 
#' @details This function makes use of the above `transfusionConfusionMatrix` 
#'     function, which returns a sensitivity and specificity at a given pair
#'     of thresholds - the CBC threshold (PN[1]) and the PN[2] threshold
#'     (either BG or ISTAT). 
#'     
#'     By generating confusion matrices across a range of test PN[2] 
#'     thresholds (default from 0 g/dL to 25 g/dL, the full range of Hgb), at
#'     both "low" and "high" CBC Hgb thresholds (5 g/dL and 7 g/dL), we can 
#'     generate ROC curves as well as P-R curves for these two conditions
#'     (low, high). These curves represent the ability of the "test" values 
#'     (either BG or iStat) to discriminate the "true" condition of anemia
#'     as defined by a given threshold (low, 5 or high, 7). 
#'     
#'     We calculate the AUROC using trapezoidal (numeric) integration. We can 
#'     also identify the "optimal" threshold to use to maximize sensitivity 
#'     and specificity by minimizing the Euclidian distance to the point (0,1) 
#'     on the ROC curve. 
#'     
#'     Similarly, on the P-R curve, we can visualize the tradeoff between 
#'     precision (PPV) and recall (sensitivity). In this situation, 
#'     precision refers to the % of test values below a threshold which
#'     represent actual anemia (or actual TPs), and is dependent on the 
#'     incidence of anemia in the population. Recall (sensitivity) represents
#'     the % of actually anemic patients which are identified by the "test" Hgb.
#'
#' @param paired.df The paired data frame containing `value.x` and `value.y`
#' @param to.print If TRUE, prints results [Default]
#' @param to.return If TRUE, returns results as a list of elements [Default]
#' @param cutoff.minmax A two-element vector of the minimum and maximum Hgb 
#'     values used to generate the full cutoff sequence [Default: 0., 25.]
#' @param cutoff.by The difference between successive values in the cutoff seq
#' 
calculateThresholdROC <- function (paired.df, 
                                   to.print = T, to.return = T,
                                   cutoff.minmax = c(0., 25.),
                                   cutoff.by = 0.1) {
  
  # Establish the sequence for iterating through the threshold calculation
  cutoff.range <- seq(
    from = cutoff.minmax[1], 
    to = cutoff.minmax[2], 
    by = cutoff.by
  )
  
  # Define the empty result data frame
  roc.df <- data.frame()
  
  # Loop through the cutoff range
  for (index in 1 : length(cutoff.range)) {
    
    # First calculate using the "high" CBC (or PN[1]) value, 7.0
    res.high <-
      transfusionConfusionMatrix(
        value.x = paired.df$NUM_VAL.x,
        value.y = paired.df$NUM_VAL.y,
        cutoffs = c(7., cutoff.range[index]),
        to.print = F,
        to.return = T
      )
    
    # Now calculate using the "low" CbC (or PN[1]) value, 5.0
    res.low <-
      transfusionConfusionMatrix(
        value.x = paired.df$NUM_VAL.x,
        value.y = paired.df$NUM_VAL.y,
        cutoffs = c(5., cutoff.range[index]),
        to.print = F,
        to.return = T
      )
    
    # RBind to the data frame
    roc.df <-
      rbind(
        roc.df,
        data.frame(
          tpr = res.high$sens,
          fpr = 1.0 - res.high$spec,
          precision = res.high$ppv,
          recall = res.high$sens,
          cbc.cutoff = 'High (7.0 g/dL)',
          bg.cutoff = cutoff.range[index]
        ),
        data.frame(
          tpr = res.low$sens,
          fpr = 1.0 - res.low$spec,
          precision = res.low$ppv,
          recall = res.low$sens,
          cbc.cutoff = 'Low (5.0 g/dL)',
          bg.cutoff = cutoff.range[index]
        )
      )
  }
  
  # Trapezoidal integration to determine AUROC values
  auroc <-
    roc.df %>%
    dplyr::group_by(cbc.cutoff) %>%
    dplyr::mutate(
      diff.fpr = lead(fpr) - fpr
    ) %>%
    dplyr::filter(!is.na(diff.fpr)) %>%
    dplyr::mutate(
      mult = diff.fpr * tpr
    ) %>% 
    dplyr::summarize(
      AUROC = sum(mult)
    )
  
  # Display results of trapezoidal integration
  if (to.print) {
    print(
      knitr::kable(
        auroc,
        col.names = c('CBC Cutoff', 'AUROC'),
        digits = c(0,3)
      ) %>%
        kableExtra::kable_paper("hover")
    )
  }
  
  # Identify optimum distance to point (0,1) by Euler's distance
  opt.cutoff <-
    roc.df %>%
    dplyr::mutate(
      dist = sqrt( (1. - tpr) ^ 2 + (fpr) ^ 2 )
    ) %>%
    dplyr::arrange(cbc.cutoff, dist) %>%
    dplyr::group_by(cbc.cutoff) %>%
    dplyr::summarize(
      DIST = first (dist),
      CUTOFF = first(bg.cutoff),
      SENS = first(tpr),
      SPEC = 1. - first(fpr)
    )
  
  if (to.print)
    print(
      knitr::kable(
        opt.cutoff,
        col.names = c('CBC Cutoff', 'Distance', 'Cutoff', 'Sens', 'Spec'),
        digits = c(0, 3, 1, 3, 3)
      ) %>%
        kableExtra::kable_paper("hover")
    )
  
  # Plot ROC curve
  p.roc <-
    roc.df %>%
    ggplot(aes(x = fpr, y = tpr, color = cbc.cutoff)) +
    geom_point(size = 2) + 
    geom_line(size = 1.2) +
    annotate('segment', x = 0, xend = 1, y = 0, yend = 1, color = '#666666', linetype = 'dashed') +
    xlab('False positive rate (1 - spec)') +
    ylab('True positive rate (sens)') +
    labs(color = 'CBC Cutoff') +
    theme_bw() +
    theme(legend.position = c(.6,.3))

  if (to.print)
    print(p.roc)
  
  # Plot Precision Recall curve
  p.pr <-
    roc.df %>%
    ggplot(aes(x = recall, y = precision, color = cbc.cutoff)) +
    geom_point(size = 2) + 
    geom_line(size = 1.2) +
    xlim(0,1) + ylim(0,1) +
    xlab('Recall (sens)') +
    ylab('Precision (ppv)') +
    labs(color = 'CBC Cutoff') +
    theme_bw() +
    theme(legend.position = c(.8,.9))
  
  if (to.print)
    print(p.pr)

  if (to.return)
    return(list(
      auroc = auroc,
      opt.cutoff = opt.cutoff,
      p.roc = p.roc, 
      p.pr = p.pr#,
      #roc.df = roc.df
    ))
}

We do this for a standard range and save / print the results:

cbc.bg.thresh.roc <- calculateThresholdROC(
  paired.df = cbc.bg,
  to.print = T,
  to.return = T,
  cutoff.by = 0.01)
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> AUROC </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.945 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.811 </td>
##   </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> Distance </th>
##    <th style="text-align:right;"> Cutoff </th>
##    <th style="text-align:right;"> Sens </th>
##    <th style="text-align:right;"> Spec </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.151 </td>
##    <td style="text-align:right;"> 8.7 </td>
##    <td style="text-align:right;"> 0.878 </td>
##    <td style="text-align:right;"> 0.911 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.335 </td>
##    <td style="text-align:right;"> 8.8 </td>
##    <td style="text-align:right;"> 0.685 </td>
##    <td style="text-align:right;"> 0.887 </td>
##   </tr>
## </tbody>
## </table>

## Warning: Removed 560 rows containing missing values (geom_point).
## Warning: Removed 560 row(s) containing missing values (geom_path).

Save Analyses

Now we save out all of the plots and calculations that we have completed:

save(
  file = file.path(
    Sys.getenv('PICU_LAB_DATA_PATH'),
    paste0(
      Sys.getenv('PICU_LAB_SITE_NAME'),
      '_pri_cbc_bg_clinical_',
      run.date, '.rData'
    )
  ),
  primary.cutoff,
  #cbc.bg, covars.df, 
  error.grid.cbc.bg,
  thresh.list, regress.res, mat.across.cutoffs,
  primary.hgb.cutoff, cbc.bg.thresh.roc
)

CBC vs iSTAT

Here we re-do the clinical accuracy analyses across CBC vs POC (iStat) procedures. To do this, we first (again) define a function that runs all of the above work - similar to in 02_Analytic_Accuracy.Rmd, except this function is termed runAllClinical().

#'
#' @title Run All Clinical
#' 
#' @description Runs through all clinical accuracy tasks, for sensitivity analysis
#' 
#' @param labs.df The original labs data frame
#' @param cohort.df The original cohort data frame
#' @param compare.PN The comparison PROC name (e.g. either `BG` or `ISTAT`)
#' @param time.diff The cutoff time difference (in minutes) for determining 
#'     whether labs are "simultaneous"
#' @param multi.per.pt If TRUE, allows all results from patients; 
#'     If FALSE, only the first (chronological) result from a patient is included
#' @param primary.hgb.cutoff The primary Hgb cutoff to use for Cohen's Kappa 
#' @param sens.hgb.cutoffs The secondary Hgb cutoffs for sensitivity analysis
#' @param run.date A string representation of date for saving (format: %Y-%m-%d)
#' @param save.fn The file name (which will be concatenated with SITE and run.date), 
#'     or NA [Default] if we do not wish to save any results to a file
#'
runAllClinical <- function (labs.df, cohort.df, compare.PN,
                            time.diff, multi.per.pt, primary.hgb.cutoff,
                            sens.hgb.cutoffs, run.date, save.fn = NA) {

  # Generate the paired dataset
  paired.df <- createPairedDataset(
    labs.df = labs.df, 
    cohort.df = cohort.df,
    PN = c('CBC', compare.PN), 
    CN = 'Hgb',
    time.diff = time.diff,
    multi.per.pt = multi.per.pt
  )
  
  # Calculate and display the error grid
  error.grid <- calculateErrorGrid(
    df = paired.df,
    to.plot = T,
    to.return = T
  )

  # Gather covariates for this paired set
  covars.df <- gatherCovariates(paired.df, labs.df)

  # Display summaries of those covars
  displayCovariateStats(covars.df)

  # Threshold lists are a list of three-element vectors, which the three elements
  # corresponding to: min, max, threshold
  # This can be read as, between the min and max, the mean diff must be less than 
  # the threshold, otherwise it is not `WELL_MATCHED`
  thresh.list <-
    list(
      c(-100, 6, 1.5),
      c(6, 9, 1.0),
      c(9, 100, 1.5)
    )

  # Join, impute, and run regression on these pairs
  regress.res <-
    joinImputeRegress(
      paired.df = paired.df,
      covars.df = covars.df,
      thresh.list = thresh.list,
      impute.fx = median,
      ci = T
    )


  # Calculate the Cohen's Kappa, both at primary and sensitivity hgb cutoffs
  calculateCohenKappa(
    values.x = paired.df$NUM_VAL.x,
    values.y = paired.df$NUM_VAL.y,
    cutoff = primary.hgb.cutoff,
    to.print = T
  )

  for (thresh in sens.hgb.cutoffs) {
    
    calculateCohenKappa(
      values.x = paired.df$NUM_VAL.x,
      values.y = paired.df$NUM_VAL.y,
      cutoff = thresh,
      to.print = T
    )
    
  }
  
  rm(thresh)

  # Compute Transfusion Confusion Matrix at a range of PN[2] cutoff values
  mat.across.cutoffs <- list()
  
  for (pn2.cutoff in c(7.0, 7.5, 8.0, 8.5, 9.0)) {
    
    res <- transfusionConfusionMatrix(
      value.x = paired.df$NUM_VAL.x,
      value.y = paired.df$NUM_VAL.y,
      cutoffs = c(primary.hgb.cutoff, pn2.cutoff),
      to.print = T,
      to.return = T
    )
    
    mat.across.cutoffs <-
      append(
        mat.across.cutoffs,
        list(list( # Use a double list here so that we enclose each cutoff results in a list
          res = res, cutoff = pn2.cutoff
        ))
      )
  }  
  
  rm(pn2.cutoff)

  # And calculate the ROC and P-R curves 
  thresh.roc <- calculateThresholdROC(
    paired.df = paired.df,
    to.print = T,
    to.return = T,
    cutoff.by = 0.01)

  # Save out?
  if (! any(is.na(save.fn)) ) {
    save(
      file = file.path(
        Sys.getenv('PICU_LAB_DATA_PATH'),
        paste0(
          Sys.getenv('PICU_LAB_SITE_NAME'),
          '_', save.fn, '_',
          run.date, '.rData'
        )
      ),
      primary.cutoff,
      #paired.df, covars.df, 
      error.grid,
      thresh.list, regress.res, mat.across.cutoffs,
      primary.hgb.cutoff, thresh.roc
    )
  }
}

Now we run for the POC values using the primary cutoff, allowing all results per patient (if the PROC exists):

if ('ISTAT' %in% unique(labs.df$PROC_NAME)) {
  
  runAllClinical(
    labs.df,
    cohort.df, 
    compare.PN = 'ISTAT',
    time.diff = primary.cutoff,
    multi.per.pt = T,
    primary.hgb.cutoff = primary.hgb.cutoff,
    sens.hgb.cutoffs = sens.hgb.cutoffs,
    run.date = run.date,
    save.fn = 'pri_cbc_istat_clinical'
  )

}
## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 3802
## Number of non-duplicated first PROC_NAME rows: 3581
## Number of non-duplicated second PROC_NAME rows: 3575
## Number of paired, simultaneous values: 3575
## Number of duplicated ORDER_PROC_KEY.x values: 0
## Counts by Box:
##  Box A: 3163 (88.5 %)
##  Box B: 3575 (100.0 %)
##  Box C: 3 (0.1 %)
##  Box D: 7 (0.2 %)
## Counts by Area:
##  Green Area: 3163 (88.48 %)
##  Yellow Area: 402 (11.24 %)
##  Red Area: 10 (0.28 %)

## NUmber of unique PN[1] order procedure keys: 3575
##     Lactate      
##  Min.   : 0.600  
##  1st Qu.: 1.200  
##  Median : 2.150  
##  Mean   : 3.943  
##  3rd Qu.: 4.950  
##  Max.   :19.100  
##  NA's   :3459    
## Count (and %) of NAs in Lactate column: 3459 (96.76 %)

##       Gluc      
##  Min.   : 31.0  
##  1st Qu.: 95.5  
##  Median :125.0  
##  Mean   :141.9  
##  3rd Qu.:175.0  
##  Max.   :440.0  
##  NA's   :3456   
## Count (and %) of NAs in Gluc column: 3456 (96.67 %)

##       iCal     
##  Min.   :0.42  
##  1st Qu.:1.16  
##  Median :1.25  
##  Mean   :1.26  
##  3rd Qu.:1.34  
##  Max.   :2.72  
##  NA's   :12    
## Count (and %) of NAs in iCal column: 12 (0.34 %)

##      Bicarb     
##  Min.   : 1.60  
##  1st Qu.:20.80  
##  Median :24.50  
##  Mean   :25.04  
##  3rd Qu.:28.90  
##  Max.   :58.00  
##  NA's   :35     
## Count (and %) of NAs in Bicarb column: 35 (0.98 %)

##        pH       
##  Min.   :6.632  
##  1st Qu.:7.251  
##  Median :7.335  
##  Mean   :7.318  
##  3rd Qu.:7.401  
##  Max.   :7.884  
##  NA's   :14     
## Count (and %) of NAs in pH column: 14 (0.39 %)
## Number (%) of `WELL MATCHED`: 2859 (79.97 %)
## 
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb + 
##     iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>% 
##     dplyr::mutate(pH = pH * 10))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1605   0.6003   0.6442   0.6906   0.9104  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -6.360e+00  2.599e+00  -2.447  0.01439 * 
## NUM_VAL.y    3.444e-03  1.515e-02   0.227  0.82023   
## pH           1.030e-01  3.467e-02   2.969  0.00298 **
## Gluc         7.317e-04  3.734e-03   0.196  0.84464   
## Bicarb      -1.342e-02  6.883e-03  -1.950  0.05118 . 
## iCal         1.859e-01  2.434e-01   0.764  0.44511   
## Lactate      1.479e-02  5.817e-02   0.254  0.79929   
## AGE_PROC    -3.294e-06  2.258e-05  -0.146  0.88403   
## DEPTPICU     2.445e-01  1.066e-01   2.294  0.02181 * 
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 3580.6  on 3574  degrees of freedom
## Residual deviance: 3564.6  on 3566  degrees of freedom
## AIC: 3582.6
## 
## Number of Fisher Scoring iterations: 4
## 
## (Intercept)   NUM_VAL.y          pH        Gluc      Bicarb        iCal 
## 0.001730022 1.003449830 1.108444456 1.000731926 0.986668058 1.204252603 
##     Lactate    AGE_PROC    DEPTPICU 
## 1.014899926 0.999996706 1.277027204
## Waiting for profiling to be done...

##                    2.5 %   97.5 %
## (Intercept) 1.078705e-05 0.288014
## NUM_VAL.y   9.741428e-01 1.033785
## pH          1.035271e+00 1.186077
## Gluc        9.938440e-01 1.008658
## Bicarb      9.734956e-01 1.000130
## iCal        7.503648e-01 1.948778
## Lactate     9.157862e-01 1.159105
## AGE_PROC    9.999528e-01 1.000041
## DEPTPICU    1.035634e+00 1.573119
## Pre-Range Check Length: 3575
## Cutoff value used: 7
## X :: Pos: 0.04   Neg: 0.96
## Y :: Pos: 0.06   Neg: 0.94
##  P.pos: 0.0020
##  P.neg: 0.9103
##  P.obs: 0.96
##  P.exp: 0.91
## Kappa: 0.56
## Pre-Range Check Length: 3575
## Cutoff value used: 5
## X :: Pos: 0.00   Neg: 1.00
## Y :: Pos: 0.01   Neg: 0.99
##  P.pos: 0.0000
##  P.neg: 0.9911
##  P.obs: 0.99
##  P.exp: 0.99
## Kappa: 0.18
## Pre-Range Check Length: 3575
## Cutoff value used: 9
## X :: Pos: 0.20   Neg: 0.80
## Y :: Pos: 0.22   Neg: 0.78
##  P.pos: 0.0440
##  P.neg: 0.6242
##  P.obs: 0.90
##  P.exp: 0.67
## Kappa: 0.70
## Total number of input rows: 3575
## Gold Standard:
##  Positive: 130 (3.64 %)
##  Negative: 3445 (96.36 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.0
## TP: 95 (2.6573 %)    FP: 103 (2.8811 %)
## FN: 35 (0.9790 %)    TN: 3342 (93.4825 %)
## Sens: 0.7308
## Spec: 0.9701
## PPV: 0.4798
## NPV: 0.9896
## FOR (1-NPV): 0.0104
## NNM (1/FOR): 96.4857
## 
## Total number of input rows: 3575
## Gold Standard:
##  Positive: 130 (3.64 %)
##  Negative: 3445 (96.36 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.5
## TP: 102 (2.8531 %)   FP: 157 (4.3916 %)
## FN: 28 (0.7832 %)    TN: 3288 (91.9720 %)
## Sens: 0.7846
## Spec: 0.9544
## PPV: 0.3938
## NPV: 0.9916
## FOR (1-NPV): 0.0084
## NNM (1/FOR): 118.4286
## 
## Total number of input rows: 3575
## Gold Standard:
##  Positive: 130 (3.64 %)
##  Negative: 3445 (96.36 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.0
## TP: 117 (3.2727 %)   FP: 314 (8.7832 %)
## FN: 13 (0.3636 %)    TN: 3131 (87.5804 %)
## Sens: 0.9000
## Spec: 0.9089
## PPV: 0.2715
## NPV: 0.9959
## FOR (1-NPV): 0.0041
## NNM (1/FOR): 241.8462
## 
## Total number of input rows: 3575
## Gold Standard:
##  Positive: 130 (3.64 %)
##  Negative: 3445 (96.36 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.5
## TP: 120 (3.3566 %)   FP: 418 (11.6923 %)
## FN: 10 (0.2797 %)    TN: 3027 (84.6713 %)
## Sens: 0.9231
## Spec: 0.8787
## PPV: 0.2230
## NPV: 0.9967
## FOR (1-NPV): 0.0033
## NNM (1/FOR): 303.7000
## 
## Total number of input rows: 3575
## Gold Standard:
##  Positive: 130 (3.64 %)
##  Negative: 3445 (96.36 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 9.0
## TP: 124 (3.4685 %)   FP: 654 (18.2937 %)
## FN: 6 (0.1678 %) TN: 2791 (78.0699 %)
## Sens: 0.9538
## Spec: 0.8102
## PPV: 0.1594
## NPV: 0.9979
## FOR (1-NPV): 0.0021
## NNM (1/FOR): 466.1667
## 
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> AUROC </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.955 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.855 </td>
##   </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> Distance </th>
##    <th style="text-align:right;"> Cutoff </th>
##    <th style="text-align:right;"> Sens </th>
##    <th style="text-align:right;"> Spec </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.135 </td>
##    <td style="text-align:right;"> 7.8 </td>
##    <td style="text-align:right;"> 0.900 </td>
##    <td style="text-align:right;"> 0.909 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.272 </td>
##    <td style="text-align:right;"> 8.8 </td>
##    <td style="text-align:right;"> 0.833 </td>
##    <td style="text-align:right;"> 0.784 </td>
##   </tr>
## </tbody>
## </table>

## Warning: Removed 682 rows containing missing values (geom_point).
## Warning: Removed 682 row(s) containing missing values (geom_path).

Sensitivity Analyses

Now we complete some of the same above measures using different permutations, as sensitivity analyses (same as we did in 02_Analytic_Accuracy.Rmd).

Single Value per Patient

First we change the parameters to require a single value per patient. We run this across both BG and ISTAT pairs (if they exist).

for (proc.option in c('BG', 'ISTAT')) {
  
  if (proc.option %in% unique(labs.df$PROC_NAME)) {
    
    runAllClinical(
      labs.df,
      cohort.df, 
      compare.PN = proc.option,
      time.diff = primary.cutoff,
      multi.per.pt = F, # This is the change in this section
      primary.hgb.cutoff = primary.hgb.cutoff,
      sens.hgb.cutoffs = sens.hgb.cutoffs,
      run.date = run.date,
      save.fn = paste0('single_pt_cbc_', tolower(proc.option), '_clinical')
    )

  }
}
## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 72997
## Number of non-duplicated first PROC_NAME rows: 67141
## Number of non-duplicated second PROC_NAME rows: 67077
## Number of paired, simultaneous values: 9511
## Number of duplicated ORDER_PROC_KEY.x values: 0
## Counts by Box:
##  Box A: 9233 (97.1 %)
##  Box B: 9511 (100.0 %)
##  Box C: 3 (0.0 %)
##  Box D: 4 (0.0 %)
## Counts by Area:
##  Green Area: 9233 (97.08 %)
##  Yellow Area: 271 (2.85 %)
##  Red Area: 7 (0.07 %)

## NUmber of unique PN[1] order procedure keys: 9511
##     Lactate      
##  Min.   : 0.500  
##  1st Qu.: 1.100  
##  Median : 1.600  
##  Mean   : 2.267  
##  3rd Qu.: 2.500  
##  Max.   :27.200  
##  NA's   :3217    
## Count (and %) of NAs in Lactate column: 3217 (33.82 %)

##       Gluc       
##  Min.   :  13.0  
##  1st Qu.:  99.0  
##  Median : 121.0  
##  Mean   : 135.9  
##  3rd Qu.: 152.0  
##  Max.   :1464.0  
##  NA's   :796     
## Count (and %) of NAs in Gluc column: 796 (8.37 %)

##       iCal      
##  Min.   :0.410  
##  1st Qu.:1.110  
##  Median :1.170  
##  Mean   :1.167  
##  3rd Qu.:1.220  
##  Max.   :2.600  
##  NA's   :786    
## Count (and %) of NAs in iCal column: 786 (8.26 %)

##      Bicarb     
##  Min.   : 1.80  
##  1st Qu.:21.60  
##  Median :24.20  
##  Mean   :24.34  
##  3rd Qu.:27.00  
##  Max.   :59.50  
##  NA's   :3      
## Count (and %) of NAs in Bicarb column: 3 (0.03 %)

##        pH       
##  Min.   :6.605  
##  1st Qu.:7.333  
##  Median :7.378  
##  Mean   :7.367  
##  3rd Qu.:7.416  
##  Max.   :7.689  
##  NA's   :2      
## Count (and %) of NAs in pH column: 2 (0.02 %)
## Number (%) of `WELL MATCHED`: 9079 (95.46 %)
## 
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb + 
##     iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>% 
##     dplyr::mutate(pH = pH * 10))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.1144   0.2078   0.3070   0.3571   0.8432  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  1.174e+01  4.833e+00   2.428  0.01518 *  
## NUM_VAL.y    9.355e-02  2.335e-02   4.006 6.17e-05 ***
## pH          -1.045e-01  6.420e-02  -1.627  0.10364    
## Gluc        -8.182e-04  7.226e-04  -1.132  0.25752    
## Bicarb       8.562e-03  1.077e-02   0.795  0.42683    
## iCal        -1.312e+00  4.551e-01  -2.883  0.00394 ** 
## Lactate      1.638e-02  2.873e-02   0.570  0.56865    
## AGE_PROC    -5.347e-05  2.465e-05  -2.169  0.03009 *  
## DEPTPICU    -8.241e-01  1.416e-01  -5.822 5.83e-09 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 3515.4  on 9510  degrees of freedom
## Residual deviance: 3395.4  on 9502  degrees of freedom
## AIC: 3413.4
## 
## Number of Fisher Scoring iterations: 6
## 
##  (Intercept)    NUM_VAL.y           pH         Gluc       Bicarb         iCal 
## 1.249565e+05 1.098064e+00 9.007870e-01 9.991822e-01 1.008598e+00 2.693275e-01 
##      Lactate     AGE_PROC     DEPTPICU 
## 1.016510e+00 9.999465e-01 4.386421e-01
## Waiting for profiling to be done...

##                  2.5 %       97.5 %
## (Intercept) 10.8417694 1.827967e+09
## NUM_VAL.y    1.0491188 1.149685e+00
## pH           0.7929680 1.019832e+00
## Gluc         0.9978556 1.000701e+00
## Bicarb       0.9878004 1.030392e+00
## iCal         0.1124201 6.734354e-01
## Lactate      0.9641851 1.079449e+00
## AGE_PROC     0.9998985 9.999952e-01
## DEPTPICU     0.3307551 5.763717e-01
## Pre-Range Check Length: 9511
## Cutoff value used: 7
## X :: Pos: 0.02   Neg: 0.98
## Y :: Pos: 0.01   Neg: 0.99
##  P.pos: 0.0002
##  P.neg: 0.9733
##  P.obs: 0.99
##  P.exp: 0.97
## Kappa: 0.66
## Pre-Range Check Length: 9511
## Cutoff value used: 5
## X :: Pos: 0.00   Neg: 1.00
## Y :: Pos: 0.00   Neg: 1.00
##  P.pos: 0.0000
##  P.neg: 0.9967
##  P.obs: 1.00
##  P.exp: 1.00
## Kappa: 0.71
## Pre-Range Check Length: 9511
## Cutoff value used: 9
## X :: Pos: 0.13   Neg: 0.87
## Y :: Pos: 0.08   Neg: 0.92
##  P.pos: 0.0109
##  P.neg: 0.7970
##  P.obs: 0.95
##  P.exp: 0.81
## Kappa: 0.71
## Total number of input rows: 9511
## Gold Standard:
##  Positive: 164 (1.72 %)
##  Negative: 9347 (98.28 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.0
## TP: 85 (0.8937 %)    FP: 7 (0.0736 %)
## FN: 79 (0.8306 %)    TN: 9340 (98.2021 %)
## Sens: 0.5183
## Spec: 0.9993
## PPV: 0.9239
## NPV: 0.9916
## FOR (1-NPV): 0.0084
## NNM (1/FOR): 119.2278
## 
## Total number of input rows: 9511
## Gold Standard:
##  Positive: 164 (1.72 %)
##  Negative: 9347 (98.28 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.5
## TP: 130 (1.3668 %)   FP: 52 (0.5467 %)
## FN: 34 (0.3575 %)    TN: 9295 (97.7289 %)
## Sens: 0.7927
## Spec: 0.9944
## PPV: 0.7143
## NPV: 0.9964
## FOR (1-NPV): 0.0036
## NNM (1/FOR): 274.3824
## 
## Total number of input rows: 9511
## Gold Standard:
##  Positive: 164 (1.72 %)
##  Negative: 9347 (98.28 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.0
## TP: 152 (1.5981 %)   FP: 151 (1.5876 %)
## FN: 12 (0.1262 %)    TN: 9196 (96.6880 %)
## Sens: 0.9268
## Spec: 0.9838
## PPV: 0.5017
## NPV: 0.9987
## FOR (1-NPV): 0.0013
## NNM (1/FOR): 767.3333
## 
## Total number of input rows: 9511
## Gold Standard:
##  Positive: 164 (1.72 %)
##  Negative: 9347 (98.28 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.5
## TP: 153 (1.6087 %)   FP: 342 (3.5958 %)
## FN: 11 (0.1157 %)    TN: 9005 (94.6798 %)
## Sens: 0.9329
## Spec: 0.9634
## PPV: 0.3091
## NPV: 0.9988
## FOR (1-NPV): 0.0012
## NNM (1/FOR): 819.6364
## 
## Total number of input rows: 9511
## Gold Standard:
##  Positive: 164 (1.72 %)
##  Negative: 9347 (98.28 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 9.0
## TP: 155 (1.6297 %)   FP: 635 (6.6765 %)
## FN: 9 (0.0946 %) TN: 8712 (91.5992 %)
## Sens: 0.9451
## Spec: 0.9321
## PPV: 0.1962
## NPV: 0.9990
## FOR (1-NPV): 0.0010
## NNM (1/FOR): 969.0000
## 
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> AUROC </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.975 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.983 </td>
##   </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> Distance </th>
##    <th style="text-align:right;"> Cutoff </th>
##    <th style="text-align:right;"> Sens </th>
##    <th style="text-align:right;"> Spec </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.074 </td>
##    <td style="text-align:right;"> 8.3 </td>
##    <td style="text-align:right;"> 0.933 </td>
##    <td style="text-align:right;"> 0.969 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.058 </td>
##    <td style="text-align:right;"> 7.7 </td>
##    <td style="text-align:right;"> 0.947 </td>
##    <td style="text-align:right;"> 0.976 </td>
##   </tr>
## </tbody>
## </table>

## Warning: Removed 602 rows containing missing values (geom_point).
## Warning: Removed 602 row(s) containing missing values (geom_path).

## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 3802
## Number of non-duplicated first PROC_NAME rows: 3581
## Number of non-duplicated second PROC_NAME rows: 3575
## Number of paired, simultaneous values: 2359
## Number of duplicated ORDER_PROC_KEY.x values: 0
## Counts by Box:
##  Box A: 2131 (90.3 %)
##  Box B: 2359 (100.0 %)
##  Box C: 3 (0.1 %)
##  Box D: 5 (0.2 %)
## Counts by Area:
##  Green Area: 2131 (90.33 %)
##  Yellow Area: 220 (9.33 %)
##  Red Area: 8 (0.34 %)

## NUmber of unique PN[1] order procedure keys: 2359
##     Lactate      
##  Min.   : 0.600  
##  1st Qu.: 1.200  
##  Median : 2.100  
##  Mean   : 3.595  
##  3rd Qu.: 4.300  
##  Max.   :16.100  
##  NA's   :2273    
## Count (and %) of NAs in Lactate column: 2273 (96.35 %)

##       Gluc      
##  Min.   : 31.0  
##  1st Qu.: 91.0  
##  Median :121.0  
##  Mean   :135.1  
##  3rd Qu.:167.0  
##  Max.   :339.0  
##  NA's   :2270   
## Count (and %) of NAs in Gluc column: 2270 (96.23 %)

##       iCal      
##  Min.   :0.420  
##  1st Qu.:1.170  
##  Median :1.250  
##  Mean   :1.258  
##  3rd Qu.:1.340  
##  Max.   :2.490  
##  NA's   :7      
## Count (and %) of NAs in iCal column: 7 (0.30 %)

##      Bicarb     
##  Min.   : 1.60  
##  1st Qu.:20.12  
##  Median :23.70  
##  Mean   :24.17  
##  3rd Qu.:27.70  
##  Max.   :56.80  
##  NA's   :21     
## Count (and %) of NAs in Bicarb column: 21 (0.89 %)

##        pH       
##  Min.   :6.661  
##  1st Qu.:7.241  
##  Median :7.326  
##  Mean   :7.311  
##  3rd Qu.:7.396  
##  Max.   :7.884  
##  NA's   :8      
## Count (and %) of NAs in pH column: 8 (0.34 %)
## Number (%) of `WELL MATCHED`: 1943 (82.37 %)
## 
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb + 
##     iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>% 
##     dplyr::mutate(pH = pH * 10))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.0906   0.5595   0.6016   0.6430   0.8063  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)  
## (Intercept) -5.532e+00  3.384e+00  -1.635   0.1021  
## NUM_VAL.y   -2.619e-02  1.962e-02  -1.335   0.1819  
## pH           9.020e-02  4.468e-02   2.019   0.0435 *
## Gluc         4.373e-03  5.460e-03   0.801   0.4232  
## Bicarb      -1.026e-02  9.213e-03  -1.114   0.2654  
## iCal         3.225e-01  3.406e-01   0.947   0.3437  
## Lactate     -9.694e-03  7.023e-02  -0.138   0.8902  
## AGE_PROC     2.056e-05  2.925e-05   0.703   0.4821  
## DEPTPICU     1.263e-01  1.442e-01   0.876   0.3812  
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 2197.7  on 2358  degrees of freedom
## Residual deviance: 2186.3  on 2350  degrees of freedom
## AIC: 2204.3
## 
## Number of Fisher Scoring iterations: 4
## 
## (Intercept)   NUM_VAL.y          pH        Gluc      Bicarb        iCal 
## 0.003956617 0.974153798 1.094397656 1.004382158 0.989791192 1.380556839 
##     Lactate    AGE_PROC    DEPTPICU 
## 0.990353153 1.000020560 1.134594223
## Waiting for profiling to be done...

##                    2.5 %   97.5 %
## (Intercept) 5.404230e-06 3.147582
## NUM_VAL.y   9.374504e-01 1.012414
## pH          1.001967e+00 1.193919
## Gluc        9.945402e-01 1.016167
## Bicarb      9.721857e-01 1.007957
## iCal        7.133390e-01 2.710512
## Lactate     8.723080e-01 1.164277
## AGE_PROC    9.999638e-01 1.000079
## DEPTPICU    8.538870e-01 1.503257
## Pre-Range Check Length: 2359
## Cutoff value used: 7
## X :: Pos: 0.03   Neg: 0.97
## Y :: Pos: 0.05   Neg: 0.95
##  P.pos: 0.0016
##  P.neg: 0.9210
##  P.obs: 0.97
##  P.exp: 0.92
## Kappa: 0.59
## Pre-Range Check Length: 2359
## Cutoff value used: 5
## X :: Pos: 0.00   Neg: 1.00
## Y :: Pos: 0.00   Neg: 1.00
##  P.pos: 0.0000
##  P.neg: 0.9911
##  P.obs: 0.99
##  P.exp: 0.99
## Kappa: 0.19
## Pre-Range Check Length: 2359
## Cutoff value used: 9
## X :: Pos: 0.19   Neg: 0.81
## Y :: Pos: 0.20   Neg: 0.80
##  P.pos: 0.0367
##  P.neg: 0.6535
##  P.obs: 0.91
##  P.exp: 0.69
## Kappa: 0.72
## Total number of input rows: 2359
## Gold Standard:
##  Positive: 80 (3.39 %)
##  Negative: 2279 (96.61 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.0
## TP: 58 (2.4587 %)    FP: 52 (2.2043 %)
## FN: 22 (0.9326 %)    TN: 2227 (94.4044 %)
## Sens: 0.7250
## Spec: 0.9772
## PPV: 0.5273
## NPV: 0.9902
## FOR (1-NPV): 0.0098
## NNM (1/FOR): 102.2273
## 
## Total number of input rows: 2359
## Gold Standard:
##  Positive: 80 (3.39 %)
##  Negative: 2279 (96.61 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.5
## TP: 62 (2.6282 %)    FP: 87 (3.6880 %)
## FN: 18 (0.7630 %)    TN: 2192 (92.9207 %)
## Sens: 0.7750
## Spec: 0.9618
## PPV: 0.4161
## NPV: 0.9919
## FOR (1-NPV): 0.0081
## NNM (1/FOR): 122.7778
## 
## Total number of input rows: 2359
## Gold Standard:
##  Positive: 80 (3.39 %)
##  Negative: 2279 (96.61 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.0
## TP: 72 (3.0521 %)    FP: 174 (7.3760 %)
## FN: 8 (0.3391 %) TN: 2105 (89.2327 %)
## Sens: 0.9000
## Spec: 0.9237
## PPV: 0.2927
## NPV: 0.9962
## FOR (1-NPV): 0.0038
## NNM (1/FOR): 264.1250
## 
## Total number of input rows: 2359
## Gold Standard:
##  Positive: 80 (3.39 %)
##  Negative: 2279 (96.61 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.5
## TP: 73 (3.0945 %)    FP: 233 (9.8771 %)
## FN: 7 (0.2967 %) TN: 2046 (86.7317 %)
## Sens: 0.9125
## Spec: 0.8978
## PPV: 0.2386
## NPV: 0.9966
## FOR (1-NPV): 0.0034
## NNM (1/FOR): 293.2857
## 
## Total number of input rows: 2359
## Gold Standard:
##  Positive: 80 (3.39 %)
##  Negative: 2279 (96.61 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 9.0
## TP: 76 (3.2217 %)    FP: 388 (16.4476 %)
## FN: 4 (0.1696 %) TN: 1891 (80.1611 %)
## Sens: 0.9500
## Spec: 0.8297
## PPV: 0.1638
## NPV: 0.9979
## FOR (1-NPV): 0.0021
## NNM (1/FOR): 473.7500
## 
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> AUROC </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.954 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.857 </td>
##   </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> Distance </th>
##    <th style="text-align:right;"> Cutoff </th>
##    <th style="text-align:right;"> Sens </th>
##    <th style="text-align:right;"> Spec </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.126 </td>
##    <td style="text-align:right;"> 7.8 </td>
##    <td style="text-align:right;"> 0.9 </td>
##    <td style="text-align:right;"> 0.924 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.237 </td>
##    <td style="text-align:right;"> 8.2 </td>
##    <td style="text-align:right;"> 0.8 </td>
##    <td style="text-align:right;"> 0.873 </td>
##   </tr>
## </tbody>
## </table>

## Warning: Removed 682 rows containing missing values (geom_point).
## Warning: Removed 682 row(s) containing missing values (geom_path).

Time Threshold

Now we change parameters to alter the cutoff (min) between labs that get counted as “simultaneous” labs. We revert back to allowing multiple values per patient. We run this across both BG and ISTAT pairs (if they exist), across all sens.cutoffs values.

for (cutoff in sens.cutoffs) {

  for (proc.option in c('BG', 'ISTAT')) {
    
    if (proc.option %in% unique(labs.df$PROC_NAME)) {
      
      runAllClinical(
        labs.df,
        cohort.df, 
        compare.PN = proc.option,
        time.diff = cutoff,
        multi.per.pt = T,
        primary.hgb.cutoff = primary.hgb.cutoff,
        sens.hgb.cutoffs = sens.hgb.cutoffs,
        run.date = run.date,
        save.fn = NA
      )

    } # If the PROC exists
    
  } # Across BG vs ISTAT
  
} # Across cutoffs
## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 60273
## Number of non-duplicated first PROC_NAME rows: 58865
## Number of non-duplicated second PROC_NAME rows: 58817
## Number of paired, simultaneous values: 58817
## Number of duplicated ORDER_PROC_KEY.x values: 0
## Counts by Box:
##  Box A: 55736 (94.8 %)
##  Box B: 58817 (100.0 %)
##  Box C: 39 (0.1 %)
##  Box D: 22 (0.0 %)
## Counts by Area:
##  Green Area: 55736 (94.76 %)
##  Yellow Area: 3020 (5.13 %)
##  Red Area: 61 (0.10 %)

## NUmber of unique PN[1] order procedure keys: 58817
##     Lactate      
##  Min.   : 0.500  
##  1st Qu.: 1.100  
##  Median : 1.500  
##  Mean   : 2.031  
##  3rd Qu.: 2.100  
##  Max.   :27.200  
##  NA's   :14846   
## Count (and %) of NAs in Lactate column: 14846 (25.24 %)

##       Gluc       
##  Min.   :  13.0  
##  1st Qu.:  97.0  
##  Median : 115.0  
##  Mean   : 127.3  
##  3rd Qu.: 140.0  
##  Max.   :3008.0  
##  NA's   :4957    
## Count (and %) of NAs in Gluc column: 4957 (8.43 %)

##       iCal      
##  Min.   :0.270  
##  1st Qu.:1.120  
##  Median :1.170  
##  Mean   :1.177  
##  3rd Qu.:1.230  
##  Max.   :2.740  
##  NA's   :4907   
## Count (and %) of NAs in iCal column: 4907 (8.34 %)

##      Bicarb     
##  Min.   : 1.80  
##  1st Qu.:24.00  
##  Median :27.40  
##  Mean   :28.22  
##  3rd Qu.:31.80  
##  Max.   :82.60  
##  NA's   :4      
## Count (and %) of NAs in Bicarb column: 4 (0.01 %)

##        pH        
##  Min.   : 6.615  
##  1st Qu.: 7.354  
##  Median : 7.398  
##  Mean   : 7.393  
##  3rd Qu.: 7.437  
##  Max.   :37.600  
##  NA's   :4       
## Count (and %) of NAs in pH column: 4 (0.01 %)
## Number (%) of `WELL MATCHED`: 54537 (92.72 %)
## 
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb + 
##     iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>% 
##     dplyr::mutate(pH = pH * 10))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.0129   0.2454   0.3975   0.4401   1.0351  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.767e+00  5.394e-01   6.984 2.88e-12 ***
## NUM_VAL.y    3.001e-02  8.130e-03   3.691 0.000223 ***
## pH          -6.668e-03  6.459e-03  -1.032 0.301899    
## Gluc        -6.591e-04  2.550e-04  -2.585 0.009750 ** 
## Bicarb       1.993e-03  2.495e-03   0.799 0.424454    
## iCal        -2.219e-01  1.616e-01  -1.373 0.169765    
## Lactate      9.030e-02  1.250e-02   7.226 4.98e-13 ***
## AGE_PROC    -1.055e-04  7.689e-06 -13.719  < 2e-16 ***
## DEPTPICU    -9.561e-01  4.807e-02 -19.892  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 30672  on 58816  degrees of freedom
## Residual deviance: 29472  on 58808  degrees of freedom
## AIC: 29490
## 
## Number of Fisher Scoring iterations: 6
## 
## (Intercept)   NUM_VAL.y          pH        Gluc      Bicarb        iCal 
##  43.2552951   1.0304637   0.9933539   0.9993412   1.0019950   0.8010318 
##     Lactate    AGE_PROC    DEPTPICU 
##   1.0944984   0.9998945   0.3843909
## Waiting for profiling to be done...

##                  2.5 %      97.5 %
## (Intercept) 11.0130381 140.1854815
## NUM_VAL.y    1.0142103   1.0470525
## pH           0.9789048   1.0107766
## Gluc         0.9988511   0.9998556
## Bicarb       0.9971292   1.0069301
## iCal         0.5844406   1.1010312
## Lactate      1.0687174   1.1223763
## AGE_PROC     0.9998795   0.9999096
## DEPTPICU     0.3496007   0.4220938
## Pre-Range Check Length: 58817
## Cutoff value used: 7
## X :: Pos: 0.02   Neg: 0.98
## Y :: Pos: 0.01   Neg: 0.99
##  P.pos: 0.0001
##  P.neg: 0.9753
##  P.obs: 0.99
##  P.exp: 0.98
## Kappa: 0.51
## Pre-Range Check Length: 58817
## Cutoff value used: 5
## X :: Pos: 0.00   Neg: 1.00
## Y :: Pos: 0.00   Neg: 1.00
##  P.pos: 0.0000
##  P.neg: 0.9985
##  P.obs: 1.00
##  P.exp: 1.00
## Kappa: 0.37
## Pre-Range Check Length: 58817
## Cutoff value used: 9
## X :: Pos: 0.20   Neg: 0.80
## Y :: Pos: 0.12   Neg: 0.88
##  P.pos: 0.0252
##  P.neg: 0.6980
##  P.obs: 0.91
##  P.exp: 0.72
## Kappa: 0.66
## Total number of input rows: 58817
## Gold Standard:
##  Positive: 1002 (1.70 %)
##  Negative: 57815 (98.30 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.0
## TP: 374 (0.6359 %)   FP: 87 (0.1479 %)
## FN: 628 (1.0677 %)   TN: 57728 (98.1485 %)
## Sens: 0.3733
## Spec: 0.9985
## PPV: 0.8113
## NPV: 0.9892
## FOR (1-NPV): 0.0108
## NNM (1/FOR): 92.9236
## 
## Total number of input rows: 58817
## Gold Standard:
##  Positive: 1002 (1.70 %)
##  Negative: 57815 (98.30 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.5
## TP: 665 (1.1306 %)   FP: 449 (0.7634 %)
## FN: 337 (0.5730 %)   TN: 57366 (97.5330 %)
## Sens: 0.6637
## Spec: 0.9922
## PPV: 0.5969
## NPV: 0.9942
## FOR (1-NPV): 0.0058
## NNM (1/FOR): 171.2255
## 
## Total number of input rows: 58817
## Gold Standard:
##  Positive: 1002 (1.70 %)
##  Negative: 57815 (98.30 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.0
## TP: 794 (1.3499 %)   FP: 1553 (2.6404 %)
## FN: 208 (0.3536 %)   TN: 56262 (95.6560 %)
## Sens: 0.7924
## Spec: 0.9731
## PPV: 0.3383
## NPV: 0.9963
## FOR (1-NPV): 0.0037
## NNM (1/FOR): 271.4904
## 
## Total number of input rows: 58817
## Gold Standard:
##  Positive: 1002 (1.70 %)
##  Negative: 57815 (98.30 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.5
## TP: 850 (1.4452 %)   FP: 3539 (6.0170 %)
## FN: 152 (0.2584 %)   TN: 54276 (92.2794 %)
## Sens: 0.8483
## Spec: 0.9388
## PPV: 0.1937
## NPV: 0.9972
## FOR (1-NPV): 0.0028
## NNM (1/FOR): 358.0789
## 
## Total number of input rows: 58817
## Gold Standard:
##  Positive: 1002 (1.70 %)
##  Negative: 57815 (98.30 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 9.0
## TP: 884 (1.5030 %)   FP: 6422 (10.9186 %)
## FN: 118 (0.2006 %)   TN: 51393 (87.3778 %)
## Sens: 0.8822
## Spec: 0.8889
## PPV: 0.1210
## NPV: 0.9977
## FOR (1-NPV): 0.0023
## NNM (1/FOR): 436.5339
## 
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> AUROC </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.941 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.777 </td>
##   </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> Distance </th>
##    <th style="text-align:right;"> Cutoff </th>
##    <th style="text-align:right;"> Sens </th>
##    <th style="text-align:right;"> Spec </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.157 </td>
##    <td style="text-align:right;"> 8.7 </td>
##    <td style="text-align:right;"> 0.871 </td>
##    <td style="text-align:right;"> 0.910 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.391 </td>
##    <td style="text-align:right;"> 8.7 </td>
##    <td style="text-align:right;"> 0.623 </td>
##    <td style="text-align:right;"> 0.898 </td>
##   </tr>
## </tbody>
## </table>

## Warning: Removed 560 rows containing missing values (geom_point).
## Warning: Removed 560 row(s) containing missing values (geom_path).

## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 279
## Number of non-duplicated first PROC_NAME rows: 277
## Number of non-duplicated second PROC_NAME rows: 276
## Number of paired, simultaneous values: 276
## Number of duplicated ORDER_PROC_KEY.x values: 0
## Counts by Box:
##  Box A: 247 (89.5 %)
##  Box B: 276 (100.0 %)
##  Box C: 0 (0.0 %)
##  Box D: 1 (0.4 %)
## Counts by Area:
##  Green Area: 247 (89.49 %)
##  Yellow Area: 28 (10.14 %)
##  Red Area: 1 (0.36 %)

## NUmber of unique PN[1] order procedure keys: 276
##     Lactate      
##  Min.   : 0.600  
##  1st Qu.: 1.300  
##  Median : 2.100  
##  Mean   : 3.701  
##  3rd Qu.: 4.000  
##  Max.   :19.100  
##  NA's   :190     
## Count (and %) of NAs in Lactate column: 190 (68.84 %)

##       Gluc      
##  Min.   : 31.0  
##  1st Qu.: 90.0  
##  Median :125.0  
##  Mean   :137.6  
##  3rd Qu.:171.0  
##  Max.   :326.0  
##  NA's   :187    
## Count (and %) of NAs in Gluc column: 187 (67.75 %)

##       iCal      
##  Min.   :0.800  
##  1st Qu.:1.150  
##  Median :1.230  
##  Mean   :1.239  
##  3rd Qu.:1.320  
##  Max.   :2.420  
##  NA's   :2      
## Count (and %) of NAs in iCal column: 2 (0.72 %)

##      Bicarb     
##  Min.   : 9.80  
##  1st Qu.:20.50  
##  Median :24.70  
##  Mean   :25.23  
##  3rd Qu.:29.20  
##  Max.   :54.30  
##  NA's   :1      
## Count (and %) of NAs in Bicarb column: 1 (0.36 %)

##        pH       
##  Min.   :6.737  
##  1st Qu.:7.240  
##  Median :7.330  
##  Mean   :7.314  
##  3rd Qu.:7.402  
##  Max.   :7.577  
## Count (and %) of NAs in pH column: 0 (0.00 %)
## Number (%) of `WELL MATCHED`: 225 (81.52 %)
## 
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb + 
##     iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>% 
##     dplyr::mutate(pH = pH * 10))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.4999   0.3571   0.5087   0.6614   1.2940  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)   
## (Intercept) -2.335e+01  1.033e+01  -2.260  0.02382 * 
## NUM_VAL.y    8.391e-02  5.481e-02   1.531  0.12577   
## pH           3.188e-01  1.407e-01   2.266  0.02348 * 
## Gluc         2.786e-03  6.128e-03   0.455  0.64933   
## Bicarb      -6.305e-02  2.563e-02  -2.460  0.01388 * 
## iCal         9.825e-01  1.009e+00   0.974  0.33024   
## Lactate      3.792e-02  9.010e-02   0.421  0.67384   
## AGE_PROC    -1.120e-04  8.403e-05  -1.333  0.18256   
## DEPTPICU     1.217e+00  3.842e-01   3.169  0.00153 **
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 264.17  on 275  degrees of freedom
## Residual deviance: 243.85  on 267  degrees of freedom
## AIC: 261.85
## 
## Number of Fisher Scoring iterations: 5
## 
##  (Intercept)    NUM_VAL.y           pH         Gluc       Bicarb         iCal 
## 7.248423e-11 1.087527e+00 1.375528e+00 1.002790e+00 9.388928e-01 2.671011e+00 
##      Lactate     AGE_PROC     DEPTPICU 
## 1.038650e+00 9.998880e-01 3.378728e+00
## Waiting for profiling to be done...

##                    2.5 %      97.5 %
## (Intercept) 7.297754e-20  0.03840205
## NUM_VAL.y   9.781522e-01  1.21391908
## pH          1.045631e+00  1.82310980
## Gluc        9.915373e-01  1.01568477
## Bicarb      8.923233e-01  0.98747316
## iCal        4.197485e-01 22.08604616
## Lactate     8.950642e-01  1.29999755
## AGE_PROC    9.997242e-01  1.00005595
## DEPTPICU    1.599281e+00  7.26578660
## Pre-Range Check Length: 276
## Cutoff value used: 7
## X :: Pos: 0.04   Neg: 0.96
## Y :: Pos: 0.05   Neg: 0.95
##  P.pos: 0.0018
##  P.neg: 0.9149
##  P.obs: 0.97
##  P.exp: 0.92
## Kappa: 0.65
## Pre-Range Check Length: 276
## Cutoff value used: 5
## X :: Pos: 0.01   Neg: 0.99
## Y :: Pos: 0.01   Neg: 0.99
##  P.pos: 0.0001
##  P.neg: 0.9856
##  P.obs: 0.99
##  P.exp: 0.99
## Kappa: 0.50
## Pre-Range Check Length: 276
## Cutoff value used: 9
## X :: Pos: 0.20   Neg: 0.80
## Y :: Pos: 0.20   Neg: 0.80
##  P.pos: 0.0412
##  P.neg: 0.6354
##  P.obs: 0.93
##  P.exp: 0.68
## Kappa: 0.78
## Total number of input rows: 276
## Gold Standard:
##  Positive: 10 (3.62 %)
##  Negative: 266 (96.38 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.0
## TP: 8 (2.8986 %) FP: 6 (2.1739 %)
## FN: 2 (0.7246 %) TN: 260 (94.2029 %)
## Sens: 0.8000
## Spec: 0.9774
## PPV: 0.5714
## NPV: 0.9924
## FOR (1-NPV): 0.0076
## NNM (1/FOR): 131.0000
## 
## Total number of input rows: 276
## Gold Standard:
##  Positive: 10 (3.62 %)
##  Negative: 266 (96.38 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.5
## TP: 8 (2.8986 %) FP: 11 (3.9855 %)
## FN: 2 (0.7246 %) TN: 255 (92.3913 %)
## Sens: 0.8000
## Spec: 0.9586
## PPV: 0.4211
## NPV: 0.9922
## FOR (1-NPV): 0.0078
## NNM (1/FOR): 128.5000
## 
## Total number of input rows: 276
## Gold Standard:
##  Positive: 10 (3.62 %)
##  Negative: 266 (96.38 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.0
## TP: 9 (3.2609 %) FP: 25 (9.0580 %)
## FN: 1 (0.3623 %) TN: 241 (87.3188 %)
## Sens: 0.9000
## Spec: 0.9060
## PPV: 0.2647
## NPV: 0.9959
## FOR (1-NPV): 0.0041
## NNM (1/FOR): 242.0000
## 
## Total number of input rows: 276
## Gold Standard:
##  Positive: 10 (3.62 %)
##  Negative: 266 (96.38 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.5
## TP: 9 (3.2609 %) FP: 34 (12.3188 %)
## FN: 1 (0.3623 %) TN: 232 (84.0580 %)
## Sens: 0.9000
## Spec: 0.8722
## PPV: 0.2093
## NPV: 0.9957
## FOR (1-NPV): 0.0043
## NNM (1/FOR): 233.0000
## 
## Total number of input rows: 276
## Gold Standard:
##  Positive: 10 (3.62 %)
##  Negative: 266 (96.38 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 9.0
## TP: 10 (3.6232 %)    FP: 46 (16.6667 %)
## FN: 0 (0.0000 %) TN: 220 (79.7101 %)
## Sens: 1.0000
## Spec: 0.8271
## PPV: 0.1786
## NPV: 1.0000
## FOR (1-NPV): 0.0000
## NNM (1/FOR): Inf
## 
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> AUROC </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.970 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.901 </td>
##   </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> Distance </th>
##    <th style="text-align:right;"> Cutoff </th>
##    <th style="text-align:right;"> Sens </th>
##    <th style="text-align:right;"> Spec </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.137 </td>
##    <td style="text-align:right;"> 7.8 </td>
##    <td style="text-align:right;"> 0.9 </td>
##    <td style="text-align:right;"> 0.906 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.197 </td>
##    <td style="text-align:right;"> 8.8 </td>
##    <td style="text-align:right;"> 1.0 </td>
##    <td style="text-align:right;"> 0.803 </td>
##   </tr>
## </tbody>
## </table>

## Warning: Removed 742 rows containing missing values (geom_point).
## Warning: Removed 742 row(s) containing missing values (geom_path).

## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 76385
## Number of non-duplicated first PROC_NAME rows: 69438
## Number of non-duplicated second PROC_NAME rows: 69336
## Number of paired, simultaneous values: 69336
## Number of duplicated ORDER_PROC_KEY.x values: 0
## Counts by Box:
##  Box A: 65674 (94.7 %)
##  Box B: 69336 (100.0 %)
##  Box C: 47 (0.1 %)
##  Box D: 32 (0.0 %)
## Counts by Area:
##  Green Area: 65674 (94.72 %)
##  Yellow Area: 3583 (5.17 %)
##  Red Area: 79 (0.11 %)

## NUmber of unique PN[1] order procedure keys: 69336
##     Lactate      
##  Min.   : 0.500  
##  1st Qu.: 1.100  
##  Median : 1.500  
##  Mean   : 2.086  
##  3rd Qu.: 2.200  
##  Max.   :27.200  
##  NA's   :17862   
## Count (and %) of NAs in Lactate column: 17862 (25.76 %)

##       Gluc       
##  Min.   :  13.0  
##  1st Qu.:  97.0  
##  Median : 115.0  
##  Mean   : 127.8  
##  3rd Qu.: 141.0  
##  Max.   :3008.0  
##  NA's   :5831    
## Count (and %) of NAs in Gluc column: 5831 (8.41 %)

##       iCal      
##  Min.   :0.270  
##  1st Qu.:1.110  
##  Median :1.170  
##  Mean   :1.177  
##  3rd Qu.:1.230  
##  Max.   :2.740  
##  NA's   :5778   
## Count (and %) of NAs in iCal column: 5778 (8.33 %)

##      Bicarb     
##  Min.   : 1.80  
##  1st Qu.:23.90  
##  Median :27.30  
##  Mean   :28.08  
##  3rd Qu.:31.60  
##  Max.   :82.60  
##  NA's   :6      
## Count (and %) of NAs in Bicarb column: 6 (0.01 %)

##        pH        
##  Min.   : 6.605  
##  1st Qu.: 7.353  
##  Median : 7.398  
##  Mean   : 7.393  
##  3rd Qu.: 7.438  
##  Max.   :37.600  
##  NA's   :5       
## Count (and %) of NAs in pH column: 5 (0.01 %)
## Number (%) of `WELL MATCHED`: 64163 (92.54 %)
## 
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb + 
##     iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>% 
##     dplyr::mutate(pH = pH * 10))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -3.1748   0.2595   0.4026   0.4454   0.9866  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  3.396e+00  5.298e-01   6.410 1.45e-10 ***
## NUM_VAL.y    3.877e-02  7.310e-03   5.304 1.14e-07 ***
## pH          -6.129e-03  6.465e-03  -0.948  0.34315    
## Gluc        -6.284e-04  2.324e-04  -2.704  0.00686 ** 
## Bicarb       3.092e-03  2.287e-03   1.352  0.17640    
## iCal        -1.232e-01  1.450e-01  -0.849  0.39579    
## Lactate      5.847e-02  9.802e-03   5.965 2.44e-09 ***
## AGE_PROC    -9.488e-05  7.025e-06 -13.506  < 2e-16 ***
## DEPTPICU    -8.611e-01  4.203e-02 -20.488  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 36803  on 69335  degrees of freedom
## Residual deviance: 35534  on 69327  degrees of freedom
## AIC: 35552
## 
## Number of Fisher Scoring iterations: 6
## 
## (Intercept)   NUM_VAL.y          pH        Gluc      Bicarb        iCal 
##  29.8409996   1.0395324   0.9938900   0.9993718   1.0030969   0.8841174 
##     Lactate    AGE_PROC    DEPTPICU 
##   1.0602167   0.9999051   0.4227003
## Waiting for profiling to be done...

##                 2.5 %     97.5 %
## (Intercept) 7.5722661 91.9670401
## NUM_VAL.y   1.0247728  1.0545641
## pH          0.9799470  1.0115232
## Gluc        0.9989247  0.9998389
## Bicarb      0.9986293  1.0076231
## iCal        0.6662094  1.1762683
## Lactate     1.0405160  1.0812842
## AGE_PROC    0.9998914  0.9999189
## DEPTPICU    0.3890915  0.4587825
## Pre-Range Check Length: 69336
## Cutoff value used: 7
## X :: Pos: 0.02   Neg: 0.98
## Y :: Pos: 0.01   Neg: 0.99
##  P.pos: 0.0002
##  P.neg: 0.9737
##  P.obs: 0.99
##  P.exp: 0.97
## Kappa: 0.51
## Pre-Range Check Length: 69336
## Cutoff value used: 5
## X :: Pos: 0.00   Neg: 1.00
## Y :: Pos: 0.00   Neg: 1.00
##  P.pos: 0.0000
##  P.neg: 0.9981
##  P.obs: 1.00
##  P.exp: 1.00
## Kappa: 0.40
## Pre-Range Check Length: 69336
## Cutoff value used: 9
## X :: Pos: 0.20   Neg: 0.80
## Y :: Pos: 0.12   Neg: 0.88
##  P.pos: 0.0250
##  P.neg: 0.7001
##  P.obs: 0.91
##  P.exp: 0.73
## Kappa: 0.67
## Total number of input rows: 69336
## Gold Standard:
##  Positive: 1239 (1.79 %)
##  Negative: 68097 (98.21 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.0
## TP: 476 (0.6865 %)   FP: 122 (0.1760 %)
## FN: 763 (1.1004 %)   TN: 67975 (98.0371 %)
## Sens: 0.3842
## Spec: 0.9982
## PPV: 0.7960
## NPV: 0.9889
## FOR (1-NPV): 0.0111
## NNM (1/FOR): 90.0891
## 
## Total number of input rows: 69336
## Gold Standard:
##  Positive: 1239 (1.79 %)
##  Negative: 68097 (98.21 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.5
## TP: 833 (1.2014 %)   FP: 555 (0.8004 %)
## FN: 406 (0.5856 %)   TN: 67542 (97.4126 %)
## Sens: 0.6723
## Spec: 0.9918
## PPV: 0.6001
## NPV: 0.9940
## FOR (1-NPV): 0.0060
## NNM (1/FOR): 167.3596
## 
## Total number of input rows: 69336
## Gold Standard:
##  Positive: 1239 (1.79 %)
##  Negative: 68097 (98.21 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.0
## TP: 986 (1.4221 %)   FP: 1844 (2.6595 %)
## FN: 253 (0.3649 %)   TN: 66253 (95.5535 %)
## Sens: 0.7958
## Spec: 0.9729
## PPV: 0.3484
## NPV: 0.9962
## FOR (1-NPV): 0.0038
## NNM (1/FOR): 262.8696
## 
## Total number of input rows: 69336
## Gold Standard:
##  Positive: 1239 (1.79 %)
##  Negative: 68097 (98.21 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.5
## TP: 1057 (1.5245 %)  FP: 4173 (6.0185 %)
## FN: 182 (0.2625 %)   TN: 63924 (92.1945 %)
## Sens: 0.8531
## Spec: 0.9387
## PPV: 0.2021
## NPV: 0.9972
## FOR (1-NPV): 0.0028
## NNM (1/FOR): 352.2308
## 
## Total number of input rows: 69336
## Gold Standard:
##  Positive: 1239 (1.79 %)
##  Negative: 68097 (98.21 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 9.0
## TP: 1099 (1.5850 %)  FP: 7557 (10.8991 %)
## FN: 140 (0.2019 %)   TN: 60540 (87.3139 %)
## Sens: 0.8870
## Spec: 0.8890
## PPV: 0.1270
## NPV: 0.9977
## FOR (1-NPV): 0.0023
## NNM (1/FOR): 433.4286
## 
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> AUROC </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.945 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.820 </td>
##   </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> Distance </th>
##    <th style="text-align:right;"> Cutoff </th>
##    <th style="text-align:right;"> Sens </th>
##    <th style="text-align:right;"> Spec </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.154 </td>
##    <td style="text-align:right;"> 8.7 </td>
##    <td style="text-align:right;"> 0.875 </td>
##    <td style="text-align:right;"> 0.911 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.330 </td>
##    <td style="text-align:right;"> 8.9 </td>
##    <td style="text-align:right;"> 0.694 </td>
##    <td style="text-align:right;"> 0.876 </td>
##   </tr>
## </tbody>
## </table>

## Warning: Removed 560 rows containing missing values (geom_point).
## Warning: Removed 560 row(s) containing missing values (geom_path).

## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 5185
## Number of non-duplicated first PROC_NAME rows: 4358
## Number of non-duplicated second PROC_NAME rows: 4332
## Number of paired, simultaneous values: 4332
## Number of duplicated ORDER_PROC_KEY.x values: 0
## Counts by Box:
##  Box A: 3790 (87.5 %)
##  Box B: 4332 (100.0 %)
##  Box C: 6 (0.1 %)
##  Box D: 11 (0.3 %)
## Counts by Area:
##  Green Area: 3790 (87.49 %)
##  Yellow Area: 525 (12.12 %)
##  Red Area: 17 (0.39 %)

## NUmber of unique PN[1] order procedure keys: 4332
##     Lactate      
##  Min.   : 0.600  
##  1st Qu.: 1.300  
##  Median : 2.200  
##  Mean   : 3.855  
##  3rd Qu.: 4.725  
##  Max.   :19.100  
##  NA's   :4208    
## Count (and %) of NAs in Lactate column: 4208 (97.14 %)

##       Gluc       
##  Min.   : 31.00  
##  1st Qu.: 97.75  
##  Median :124.00  
##  Mean   :143.12  
##  3rd Qu.:176.50  
##  Max.   :440.00  
##  NA's   :4204    
## Count (and %) of NAs in Gluc column: 4204 (97.05 %)

##       iCal      
##  Min.   :0.300  
##  1st Qu.:1.170  
##  Median :1.250  
##  Mean   :1.263  
##  3rd Qu.:1.340  
##  Max.   :2.720  
##  NA's   :8      
## Count (and %) of NAs in iCal column: 8 (0.18 %)

##      Bicarb     
##  Min.   : 1.60  
##  1st Qu.:21.10  
##  Median :24.70  
##  Mean   :25.27  
##  3rd Qu.:29.00  
##  Max.   :58.00  
##  NA's   :37     
## Count (and %) of NAs in Bicarb column: 37 (0.85 %)

##        pH       
##  Min.   :6.661  
##  1st Qu.:7.261  
##  Median :7.344  
##  Mean   :7.329  
##  3rd Qu.:7.409  
##  Max.   :7.884  
##  NA's   :15     
## Count (and %) of NAs in pH column: 15 (0.35 %)
## Number (%) of `WELL MATCHED`: 3317 (76.57 %)
## 
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb + 
##     iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>% 
##     dplyr::mutate(pH = pH * 10))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.1564   0.6335   0.6999   0.7515   1.0173  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept) -6.833e+00  2.344e+00  -2.915 0.003560 ** 
## NUM_VAL.y    2.123e-02  1.298e-02   1.636 0.101870    
## pH           9.856e-02  3.123e-02   3.156 0.001601 ** 
## Gluc         1.405e-03  3.427e-03   0.410 0.681811    
## Bicarb      -2.900e-03  6.084e-03  -0.477 0.633572    
## iCal         1.368e-01  2.012e-01   0.680 0.496576    
## Lactate      3.236e-02  5.704e-02   0.567 0.570518    
## AGE_PROC    -3.447e-06  1.938e-05  -0.178 0.858818    
## DEPTPICU     3.469e-01  9.129e-02   3.800 0.000145 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 4716.9  on 4331  degrees of freedom
## Residual deviance: 4694.4  on 4323  degrees of freedom
## AIC: 4712.4
## 
## Number of Fisher Scoring iterations: 4
## 
## (Intercept)   NUM_VAL.y          pH        Gluc      Bicarb        iCal 
## 0.001077271 1.021452401 1.103579470 1.001406181 0.997103843 1.146579129 
##     Lactate    AGE_PROC    DEPTPICU 
## 1.032886742 0.999996553 1.414712977
## Waiting for profiling to be done...

##                    2.5 %    97.5 %
## (Intercept) 1.093718e-05 0.1076267
## NUM_VAL.y   9.958650e-01 1.0478402
## pH          1.037872e+00 1.1731021
## Gluc        9.950547e-01 1.0086441
## Bicarb      9.853295e-01 1.0091188
## iCal        7.750386e-01 1.7060039
## Lactate     9.332117e-01 1.1754097
## AGE_PROC    9.999588e-01 1.0000348
## DEPTPICU    1.182780e+00 1.6918473
## Pre-Range Check Length: 4332
## Cutoff value used: 7
## X :: Pos: 0.04   Neg: 0.96
## Y :: Pos: 0.06   Neg: 0.94
##  P.pos: 0.0022
##  P.neg: 0.9068
##  P.obs: 0.96
##  P.exp: 0.91
## Kappa: 0.52
## Pre-Range Check Length: 4332
## Cutoff value used: 5
## X :: Pos: 0.01   Neg: 0.99
## Y :: Pos: 0.01   Neg: 0.99
##  P.pos: 0.0000
##  P.neg: 0.9880
##  P.obs: 0.99
##  P.exp: 0.99
## Kappa: 0.23
## Pre-Range Check Length: 4332
## Cutoff value used: 9
## X :: Pos: 0.20   Neg: 0.80
## Y :: Pos: 0.22   Neg: 0.78
##  P.pos: 0.0432
##  P.neg: 0.6270
##  P.obs: 0.89
##  P.exp: 0.67
## Kappa: 0.67
## Total number of input rows: 4332
## Gold Standard:
##  Positive: 165 (3.81 %)
##  Negative: 4167 (96.19 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.0
## TP: 112 (2.5854 %)   FP: 136 (3.1394 %)
## FN: 53 (1.2235 %)    TN: 4031 (93.0517 %)
## Sens: 0.6788
## Spec: 0.9674
## PPV: 0.4516
## NPV: 0.9870
## FOR (1-NPV): 0.0130
## NNM (1/FOR): 77.0566
## 
## Total number of input rows: 4332
## Gold Standard:
##  Positive: 165 (3.81 %)
##  Negative: 4167 (96.19 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.5
## TP: 118 (2.7239 %)   FP: 197 (4.5476 %)
## FN: 47 (1.0849 %)    TN: 3970 (91.6436 %)
## Sens: 0.7152
## Spec: 0.9527
## PPV: 0.3746
## NPV: 0.9883
## FOR (1-NPV): 0.0117
## NNM (1/FOR): 85.4681
## 
## Total number of input rows: 4332
## Gold Standard:
##  Positive: 165 (3.81 %)
##  Negative: 4167 (96.19 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.0
## TP: 134 (3.0933 %)   FP: 387 (8.9335 %)
## FN: 31 (0.7156 %)    TN: 3780 (87.2576 %)
## Sens: 0.8121
## Spec: 0.9071
## PPV: 0.2572
## NPV: 0.9919
## FOR (1-NPV): 0.0081
## NNM (1/FOR): 122.9355
## 
## Total number of input rows: 4332
## Gold Standard:
##  Positive: 165 (3.81 %)
##  Negative: 4167 (96.19 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.5
## TP: 137 (3.1625 %)   FP: 503 (11.6113 %)
## FN: 28 (0.6464 %)    TN: 3664 (84.5799 %)
## Sens: 0.8303
## Spec: 0.8793
## PPV: 0.2141
## NPV: 0.9924
## FOR (1-NPV): 0.0076
## NNM (1/FOR): 131.8571
## 
## Total number of input rows: 4332
## Gold Standard:
##  Positive: 165 (3.81 %)
##  Negative: 4167 (96.19 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 9.0
## TP: 147 (3.3934 %)   FP: 804 (18.5596 %)
## FN: 18 (0.4155 %)    TN: 3363 (77.6316 %)
## Sens: 0.8909
## Spec: 0.8071
## PPV: 0.1546
## NPV: 0.9947
## FOR (1-NPV): 0.0053
## NNM (1/FOR): 187.8333
## 
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> AUROC </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.927 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.829 </td>
##   </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> Distance </th>
##    <th style="text-align:right;"> Cutoff </th>
##    <th style="text-align:right;"> Sens </th>
##    <th style="text-align:right;"> Spec </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.206 </td>
##    <td style="text-align:right;"> 8.5 </td>
##    <td style="text-align:right;"> 0.867 </td>
##    <td style="text-align:right;"> 0.842 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.348 </td>
##    <td style="text-align:right;"> 8.8 </td>
##    <td style="text-align:right;"> 0.727 </td>
##    <td style="text-align:right;"> 0.783 </td>
##   </tr>
## </tbody>
## </table>

## Warning: Removed 682 rows containing missing values (geom_point).
## Warning: Removed 682 row(s) containing missing values (geom_path).

## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 90493
## Number of non-duplicated first PROC_NAME rows: 74478
## Number of non-duplicated second PROC_NAME rows: 73790
## Number of paired, simultaneous values: 73790
## Number of duplicated ORDER_PROC_KEY.x values: 0
## Counts by Box:
##  Box A: 69643 (94.4 %)
##  Box B: 73790 (100.0 %)
##  Box C: 59 (0.1 %)
##  Box D: 51 (0.1 %)
## Counts by Area:
##  Green Area: 69643 (94.38 %)
##  Yellow Area: 4037 (5.47 %)
##  Red Area: 110 (0.15 %)

## NUmber of unique PN[1] order procedure keys: 73790
##     Lactate      
##  Min.   : 0.500  
##  1st Qu.: 1.100  
##  Median : 1.500  
##  Mean   : 2.101  
##  3rd Qu.: 2.200  
##  Max.   :27.200  
##  NA's   :19066   
## Count (and %) of NAs in Lactate column: 19066 (25.84 %)

##       Gluc       
##  Min.   :  10.0  
##  1st Qu.:  97.0  
##  Median : 115.0  
##  Mean   : 127.9  
##  3rd Qu.: 141.0  
##  Max.   :1849.0  
##  NA's   :5686    
## Count (and %) of NAs in Gluc column: 5686 (7.71 %)

##       iCal      
##  Min.   :0.270  
##  1st Qu.:1.110  
##  Median :1.170  
##  Mean   :1.177  
##  3rd Qu.:1.230  
##  Max.   :2.740  
##  NA's   :5667   
## Count (and %) of NAs in iCal column: 5667 (7.68 %)

##      Bicarb     
##  Min.   : 1.70  
##  1st Qu.:23.80  
##  Median :27.30  
##  Mean   :28.05  
##  3rd Qu.:31.60  
##  Max.   :82.60  
##  NA's   :3      
## Count (and %) of NAs in Bicarb column: 3 (0.00 %)

##        pH       
##  Min.   :6.621  
##  1st Qu.:7.354  
##  Median :7.398  
##  Mean   :7.393  
##  3rd Qu.:7.439  
##  Max.   :7.899  
##  NA's   :4      
## Count (and %) of NAs in pH column: 4 (0.01 %)
## Number (%) of `WELL MATCHED`: 67598 (91.61 %)
## 
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb + 
##     iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>% 
##     dplyr::mutate(pH = pH * 10))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -2.5980   0.3082   0.4190   0.4644   1.0330  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  5.196e+00  1.461e+00   3.557 0.000376 ***
## NUM_VAL.y    2.850e-02  6.490e-03   4.391 1.13e-05 ***
## pH          -2.639e-02  1.926e-02  -1.370 0.170597    
## Gluc        -1.053e-03  2.166e-04  -4.862 1.16e-06 ***
## Bicarb       7.659e-03  2.200e-03   3.482 0.000498 ***
## iCal        -5.216e-01  1.301e-01  -4.010 6.07e-05 ***
## Lactate     -2.545e-02  6.378e-03  -3.990 6.60e-05 ***
## AGE_PROC    -7.473e-05  6.574e-06 -11.367  < 2e-16 ***
## DEPTPICU    -6.281e-01  3.632e-02 -17.293  < 2e-16 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 42536  on 73789  degrees of freedom
## Residual deviance: 41558  on 73781  degrees of freedom
## AIC: 41576
## 
## Number of Fisher Scoring iterations: 5
## 
## (Intercept)   NUM_VAL.y          pH        Gluc      Bicarb        iCal 
## 180.6280846   1.0289083   0.9739565   0.9989474   1.0076882   0.5935526 
##     Lactate    AGE_PROC    DEPTPICU 
##   0.9748693   0.9999253   0.5336092
## Waiting for profiling to be done...

##                  2.5 %       97.5 %
## (Intercept) 10.3802637 3187.7327061
## NUM_VAL.y    1.0159252    1.0421022
## pH           0.9377869    1.0113209
## Gluc         0.9985288    0.9993779
## Bicarb       1.0033720    1.0120617
## iCal         0.4604422    0.7667025
## Lactate      0.9629330    0.9873234
## AGE_PROC     0.9999124    0.9999382
## DEPTPICU     0.4968084    0.5728326
## Pre-Range Check Length: 73790
## Cutoff value used: 7
## X :: Pos: 0.02   Neg: 0.98
## Y :: Pos: 0.01   Neg: 0.99
##  P.pos: 0.0002
##  P.neg: 0.9725
##  P.obs: 0.99
##  P.exp: 0.97
## Kappa: 0.48
## Pre-Range Check Length: 73790
## Cutoff value used: 5
## X :: Pos: 0.00   Neg: 1.00
## Y :: Pos: 0.00   Neg: 1.00
##  P.pos: 0.0000
##  P.neg: 0.9980
##  P.obs: 1.00
##  P.exp: 1.00
## Kappa: 0.30
## Pre-Range Check Length: 73790
## Cutoff value used: 9
## X :: Pos: 0.20   Neg: 0.80
## Y :: Pos: 0.13   Neg: 0.87
##  P.pos: 0.0249
##  P.neg: 0.7006
##  P.obs: 0.91
##  P.exp: 0.73
## Kappa: 0.66
## Total number of input rows: 73790
## Gold Standard:
##  Positive: 1357 (1.84 %)
##  Negative: 72433 (98.16 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.0
## TP: 501 (0.6790 %)   FP: 185 (0.2507 %)
## FN: 856 (1.1600 %)   TN: 72248 (97.9103 %)
## Sens: 0.3692
## Spec: 0.9974
## PPV: 0.7303
## NPV: 0.9883
## FOR (1-NPV): 0.0117
## NNM (1/FOR): 85.4019
## 
## Total number of input rows: 73790
## Gold Standard:
##  Positive: 1357 (1.84 %)
##  Negative: 72433 (98.16 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.5
## TP: 873 (1.1831 %)   FP: 684 (0.9270 %)
## FN: 484 (0.6559 %)   TN: 71749 (97.2340 %)
## Sens: 0.6433
## Spec: 0.9906
## PPV: 0.5607
## NPV: 0.9933
## FOR (1-NPV): 0.0067
## NNM (1/FOR): 149.2417
## 
## Total number of input rows: 73790
## Gold Standard:
##  Positive: 1357 (1.84 %)
##  Negative: 72433 (98.16 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.0
## TP: 1045 (1.4162 %)  FP: 2073 (2.8093 %)
## FN: 312 (0.4228 %)   TN: 70360 (95.3517 %)
## Sens: 0.7701
## Spec: 0.9714
## PPV: 0.3352
## NPV: 0.9956
## FOR (1-NPV): 0.0044
## NNM (1/FOR): 226.5128
## 
## Total number of input rows: 73790
## Gold Standard:
##  Positive: 1357 (1.84 %)
##  Negative: 72433 (98.16 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.5
## TP: 1130 (1.5314 %)  FP: 4507 (6.1079 %)
## FN: 227 (0.3076 %)   TN: 67926 (92.0531 %)
## Sens: 0.8327
## Spec: 0.9378
## PPV: 0.2005
## NPV: 0.9967
## FOR (1-NPV): 0.0033
## NNM (1/FOR): 300.2335
## 
## Total number of input rows: 73790
## Gold Standard:
##  Positive: 1357 (1.84 %)
##  Negative: 72433 (98.16 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 9.0
## TP: 1181 (1.6005 %)  FP: 8057 (10.9188 %)
## FN: 176 (0.2385 %)   TN: 64376 (87.2422 %)
## Sens: 0.8703
## Spec: 0.8888
## PPV: 0.1278
## NPV: 0.9973
## FOR (1-NPV): 0.0027
## NNM (1/FOR): 366.7727
## 
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> AUROC </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.937 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.816 </td>
##   </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> Distance </th>
##    <th style="text-align:right;"> Cutoff </th>
##    <th style="text-align:right;"> Sens </th>
##    <th style="text-align:right;"> Spec </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.168 </td>
##    <td style="text-align:right;"> 8.7 </td>
##    <td style="text-align:right;"> 0.858 </td>
##    <td style="text-align:right;"> 0.910 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.346 </td>
##    <td style="text-align:right;"> 9.4 </td>
##    <td style="text-align:right;"> 0.710 </td>
##    <td style="text-align:right;"> 0.812 </td>
##   </tr>
## </tbody>
## </table>

## Warning: Removed 560 rows containing missing values (geom_point).
## Warning: Removed 560 row(s) containing missing values (geom_path).

## Number of component numeric rows in input data frame: 461758
## Number of paired, simultaneous values meeting cutoff: 11795
## Number of non-duplicated first PROC_NAME rows: 7590
## Number of non-duplicated second PROC_NAME rows: 7017
## Number of paired, simultaneous values: 7017
## Number of duplicated ORDER_PROC_KEY.x values: 0
## Counts by Box:
##  Box A: 6080 (86.6 %)
##  Box B: 7016 (100.0 %)
##  Box C: 16 (0.2 %)
##  Box D: 32 (0.5 %)
## Counts by Area:
##  Green Area: 6080 (86.65 %)
##  Yellow Area: 888 (12.65 %)
##  Red Area: 48 (0.68 %)

## NUmber of unique PN[1] order procedure keys: 7017
##     Lactate      
##  Min.   : 0.600  
##  1st Qu.: 1.300  
##  Median : 2.100  
##  Mean   : 3.832  
##  3rd Qu.: 3.975  
##  Max.   :19.100  
##  NA's   :6891    
## Count (and %) of NAs in Lactate column: 6891 (98.20 %)

##       Gluc      
##  Min.   : 31.0  
##  1st Qu.:105.5  
##  Median :136.0  
##  Mean   :151.3  
##  3rd Qu.:183.0  
##  Max.   :501.0  
##  NA's   :6886   
## Count (and %) of NAs in Gluc column: 6886 (98.13 %)

##       iCal      
##  Min.   :0.300  
##  1st Qu.:1.180  
##  Median :1.260  
##  Mean   :1.272  
##  3rd Qu.:1.350  
##  Max.   :2.500  
##  NA's   :19     
## Count (and %) of NAs in iCal column: 19 (0.27 %)

##      Bicarb     
##  Min.   : 1.60  
##  1st Qu.:21.90  
##  Median :25.70  
##  Mean   :26.01  
##  3rd Qu.:29.80  
##  Max.   :63.40  
##  NA's   :42     
## Count (and %) of NAs in Bicarb column: 42 (0.60 %)

##        pH       
##  Min.   :6.739  
##  1st Qu.:7.285  
##  Median :7.358  
##  Mean   :7.349  
##  3rd Qu.:7.422  
##  Max.   :7.734  
##  NA's   :16     
## Count (and %) of NAs in pH column: 16 (0.23 %)
## Number (%) of `WELL MATCHED`: 4825 (68.76 %)
## 
## Call:
## glm(formula = WELL_MATCHED ~ NUM_VAL.y + pH + Gluc + Bicarb + 
##     iCal + Lactate + AGE_PROC + DEPT, family = "binomial", data = thresh.df %>% 
##     dplyr::mutate(pH = pH * 10))
## 
## Deviance Residuals: 
##     Min       1Q   Median       3Q      Max  
## -1.8197  -1.4375   0.8212   0.8978   1.0222  
## 
## Coefficients:
##               Estimate Std. Error z value Pr(>|z|)    
## (Intercept)  4.759e-01  1.865e+00   0.255 0.798585    
## NUM_VAL.y   -1.125e-02  9.609e-03  -1.171 0.241533    
## pH           8.349e-04  2.457e-02   0.034 0.972890    
## Gluc        -1.881e-03  3.007e-03  -0.626 0.531633    
## Bicarb       3.330e-03  4.513e-03   0.738 0.460599    
## iCal         1.925e-01  1.543e-01   1.248 0.212158    
## Lactate      5.967e-02  5.688e-02   1.049 0.294176    
## AGE_PROC     3.983e-05  1.453e-05   2.741 0.006128 ** 
## DEPTPICU     2.250e-01  6.707e-02   3.355 0.000793 ***
## ---
## Signif. codes:  0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## 
## (Dispersion parameter for binomial family taken to be 1)
## 
##     Null deviance: 8715.0  on 7016  degrees of freedom
## Residual deviance: 8670.4  on 7008  degrees of freedom
## AIC: 8688.4
## 
## Number of Fisher Scoring iterations: 4
## 
## (Intercept)   NUM_VAL.y          pH        Gluc      Bicarb        iCal 
##   1.6095155   0.9888090   1.0008353   0.9981211   1.0033357   1.2122396 
##     Lactate    AGE_PROC    DEPTPICU 
##   1.0614825   1.0000398   1.2523838
## Waiting for profiling to be done...

##                  2.5 %    97.5 %
## (Intercept) 0.04180621 62.676110
## NUM_VAL.y   0.97036823  1.007623
## pH          0.95368861  1.050121
## Gluc        0.99224111  1.004139
## Bicarb      0.99452564  1.012281
## iCal        0.89698985  1.642481
## Lactate     0.95883488  1.203738
## AGE_PROC    1.00001147  1.000068
## DEPTPICU    1.09811576  1.428402
## Pre-Range Check Length: 7017
## Cutoff value used: 7
## X :: Pos: 0.03   Neg: 0.97
## Y :: Pos: 0.04   Neg: 0.96
##  P.pos: 0.0014
##  P.neg: 0.9246
##  P.obs: 0.96
##  P.exp: 0.93
## Kappa: 0.45
## Pre-Range Check Length: 7017
## Cutoff value used: 5
## X :: Pos: 0.01   Neg: 0.99
## Y :: Pos: 0.01   Neg: 0.99
##  P.pos: 0.0000
##  P.neg: 0.9892
##  P.obs: 0.99
##  P.exp: 0.99
## Kappa: 0.21
## Pre-Range Check Length: 7017
## Cutoff value used: 9
## X :: Pos: 0.17   Neg: 0.83
## Y :: Pos: 0.19   Neg: 0.81
##  P.pos: 0.0338
##  P.neg: 0.6655
##  P.obs: 0.88
##  P.exp: 0.70
## Kappa: 0.61
## Total number of input rows: 7017
## Gold Standard:
##  Positive: 228 (3.25 %)
##  Negative: 6789 (96.75 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.0
## TP: 127 (1.8099 %)   FP: 184 (2.6222 %)
## FN: 101 (1.4394 %)   TN: 6605 (94.1285 %)
## Sens: 0.5570
## Spec: 0.9729
## PPV: 0.4084
## NPV: 0.9849
## FOR (1-NPV): 0.0151
## NNM (1/FOR): 66.3960
## 
## Total number of input rows: 7017
## Gold Standard:
##  Positive: 228 (3.25 %)
##  Negative: 6789 (96.75 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 7.5
## TP: 134 (1.9096 %)   FP: 278 (3.9618 %)
## FN: 94 (1.3396 %)    TN: 6511 (92.7889 %)
## Sens: 0.5877
## Spec: 0.9591
## PPV: 0.3252
## NPV: 0.9858
## FOR (1-NPV): 0.0142
## NNM (1/FOR): 70.2660
## 
## Total number of input rows: 7017
## Gold Standard:
##  Positive: 228 (3.25 %)
##  Negative: 6789 (96.75 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.0
## TP: 158 (2.2517 %)   FP: 561 (7.9949 %)
## FN: 70 (0.9976 %)    TN: 6228 (88.7559 %)
## Sens: 0.6930
## Spec: 0.9174
## PPV: 0.2197
## NPV: 0.9889
## FOR (1-NPV): 0.0111
## NNM (1/FOR): 89.9714
## 
## Total number of input rows: 7017
## Gold Standard:
##  Positive: 228 (3.25 %)
##  Negative: 6789 (96.75 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 8.5
## TP: 172 (2.4512 %)   FP: 742 (10.5743 %)
## FN: 56 (0.7981 %)    TN: 6047 (86.1764 %)
## Sens: 0.7544
## Spec: 0.8907
## PPV: 0.1882
## NPV: 0.9908
## FOR (1-NPV): 0.0092
## NNM (1/FOR): 108.9821
## 
## Total number of input rows: 7017
## Gold Standard:
##  Positive: 228 (3.25 %)
##  Negative: 6789 (96.75 %)
## Cutoffs: PN[1]: 7.0  PN[2]: 9.0
## TP: 184 (2.6222 %)   FP: 1183 (16.8591 %)
## FN: 44 (0.6270 %)    TN: 5606 (79.8917 %)
## Sens: 0.8070
## Spec: 0.8257
## PPV: 0.1346
## NPV: 0.9922
## FOR (1-NPV): 0.0078
## NNM (1/FOR): 128.4091
## 
## <table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> AUROC </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.886 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.816 </td>
##   </tr>
## </tbody>
## </table><table class=" lightable-paper lightable-hover" style='font-family: "Arial Narrow", arial, helvetica, sans-serif; margin-left: auto; margin-right: auto;'>
##  <thead>
##   <tr>
##    <th style="text-align:left;"> CBC Cutoff </th>
##    <th style="text-align:right;"> Distance </th>
##    <th style="text-align:right;"> Cutoff </th>
##    <th style="text-align:right;"> Sens </th>
##    <th style="text-align:right;"> Spec </th>
##   </tr>
##  </thead>
## <tbody>
##   <tr>
##    <td style="text-align:left;"> High (7.0 g/dL) </td>
##    <td style="text-align:right;"> 0.260 </td>
##    <td style="text-align:right;"> 8.8 </td>
##    <td style="text-align:right;"> 0.807 </td>
##    <td style="text-align:right;"> 0.826 </td>
##   </tr>
##   <tr>
##    <td style="text-align:left;"> Low (5.0 g/dL) </td>
##    <td style="text-align:right;"> 0.347 </td>
##    <td style="text-align:right;"> 8.8 </td>
##    <td style="text-align:right;"> 0.711 </td>
##    <td style="text-align:right;"> 0.808 </td>
##   </tr>
## </tbody>
## </table>

## Warning: Removed 682 rows containing missing values (geom_point).
## Warning: Removed 682 row(s) containing missing values (geom_path).